This file is for analysis of some data downloaded from The COVID Tracking Project on 20 August, 2020. This file contains data on positive tests, hospitalizations, deaths, and the like for coronavirus cases in the US. Data are unique by state and date.
The downloaded data file is read in as CSV, and the date column is converted to date format:
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.4
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
cvData <- readr::read_csv("./RInputFiles/Coronavirus/CV_downloaded_200820.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## state = col_character(),
## dataQualityGrade = col_character(),
## lastUpdateEt = col_character(),
## dateModified = col_datetime(format = ""),
## checkTimeEt = col_character(),
## dateChecked = col_datetime(format = ""),
## hash = col_character(),
## grade = col_logical()
## )
## See spec(...) for full column specifications.
glimpse(cvData)
## Observations: 9,449
## Variables: 53
## $ date <dbl> 20200820, 20200820, 20200820, 20200820,...
## $ state <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO...
## $ positive <dbl> 5332, 112449, 54765, 0, 196280, 644751,...
## $ negative <dbl> 307315, 784330, 593744, 1514, 922163, 9...
## $ pending <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ hospitalizedCurrently <dbl> 51, 1105, 499, NA, 1070, 6212, 238, 47,...
## $ hospitalizedCumulative <dbl> NA, 13330, 3790, NA, 21143, NA, 6784, 1...
## $ inIcuCurrently <dbl> NA, NA, NA, NA, 388, 1707, NA, NA, 26, ...
## $ inIcuCumulative <dbl> NA, 1348, NA, NA, NA, NA, NA, NA, NA, N...
## $ onVentilatorCurrently <dbl> 6, NA, 108, NA, 233, NA, NA, NA, 12, NA...
## $ onVentilatorCumulative <dbl> NA, 734, 488, NA, NA, NA, NA, NA, NA, N...
## $ recovered <dbl> 1513, 44684, 48458, NA, 28471, NA, 5759...
## $ dataQualityGrade <chr> "A", "B", "A", "C", "A+", "B", "A", "B"...
## $ lastUpdateEt <chr> "8/20/2020 0:00", "8/20/2020 11:00", "8...
## $ dateModified <dttm> 2020-08-20 00:00:00, 2020-08-20 11:00:...
## $ checkTimeEt <chr> "8/19/2020 20:00", "8/20/2020 7:00", "8...
## $ death <dbl> 29, 1974, 641, 0, 4684, 11686, 1800, 44...
## $ hospitalized <dbl> NA, 13330, 3790, NA, 21143, NA, 6784, 1...
## $ dateChecked <dttm> 2020-08-20 00:00:00, 2020-08-20 11:00:...
## $ totalTestsViral <dbl> 312647, 891813, 648509, NA, 1116897, 10...
## $ positiveTestsViral <dbl> 4970, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ negativeTestsViral <dbl> 307360, NA, 593744, NA, NA, NA, NA, NA,...
## $ positiveCasesViral <dbl> 5332, 107483, 54765, 0, 194734, 644751,...
## $ deathConfirmed <dbl> 29, 1905, NA, NA, 4429, NA, NA, 3572, N...
## $ deathProbable <dbl> NA, 69, NA, NA, 255, NA, NA, 886, NA, 6...
## $ totalTestEncountersViral <dbl> NA, NA, NA, NA, NA, NA, 895207, NA, NA,...
## $ totalTestsPeopleViral <dbl> NA, NA, NA, 1514, NA, NA, 645170, NA, 2...
## $ totalTestsAntibody <dbl> NA, NA, NA, NA, 255456, NA, 150931, NA,...
## $ positiveTestsAntibody <dbl> NA, NA, NA, NA, NA, NA, 10406, NA, NA, ...
## $ negativeTestsAntibody <dbl> NA, NA, NA, NA, NA, NA, 140525, NA, NA,...
## $ totalTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ positiveTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ negativeTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsPeopleAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ positiveTestsPeopleAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsAntigen <dbl> NA, NA, 10358, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ fips <dbl> 2, 1, 5, 60, 4, 6, 8, 9, 11, 10, 12, 13...
## $ positiveIncrease <dbl> 85, 971, 549, 0, 723, 5920, 270, 118, 5...
## $ negativeIncrease <dbl> 1713, 10462, 6680, 0, 6481, 81363, 4657...
## $ total <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ totalTestResults <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ totalTestResultsIncrease <dbl> 1798, 11433, 7229, 0, 7204, 87283, 7348...
## $ posNeg <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ deathIncrease <dbl> 0, 30, 10, 0, 50, 163, 12, 1, 1, 0, 119...
## $ hospitalizedIncrease <dbl> 0, 250, 47, 0, 123, 0, 3, 72, 0, 0, 450...
## $ hash <chr> "c83a1d575a597788adccbe170950b8d197754b...
## $ commercialScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeRegularScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ positiveScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ score <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ grade <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
cvData <- cvData %>%
mutate(date=lubridate::ymd(date))
glimpse(cvData)
## Observations: 9,449
## Variables: 53
## $ date <date> 2020-08-20, 2020-08-20, 2020-08-20, 20...
## $ state <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO...
## $ positive <dbl> 5332, 112449, 54765, 0, 196280, 644751,...
## $ negative <dbl> 307315, 784330, 593744, 1514, 922163, 9...
## $ pending <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ hospitalizedCurrently <dbl> 51, 1105, 499, NA, 1070, 6212, 238, 47,...
## $ hospitalizedCumulative <dbl> NA, 13330, 3790, NA, 21143, NA, 6784, 1...
## $ inIcuCurrently <dbl> NA, NA, NA, NA, 388, 1707, NA, NA, 26, ...
## $ inIcuCumulative <dbl> NA, 1348, NA, NA, NA, NA, NA, NA, NA, N...
## $ onVentilatorCurrently <dbl> 6, NA, 108, NA, 233, NA, NA, NA, 12, NA...
## $ onVentilatorCumulative <dbl> NA, 734, 488, NA, NA, NA, NA, NA, NA, N...
## $ recovered <dbl> 1513, 44684, 48458, NA, 28471, NA, 5759...
## $ dataQualityGrade <chr> "A", "B", "A", "C", "A+", "B", "A", "B"...
## $ lastUpdateEt <chr> "8/20/2020 0:00", "8/20/2020 11:00", "8...
## $ dateModified <dttm> 2020-08-20 00:00:00, 2020-08-20 11:00:...
## $ checkTimeEt <chr> "8/19/2020 20:00", "8/20/2020 7:00", "8...
## $ death <dbl> 29, 1974, 641, 0, 4684, 11686, 1800, 44...
## $ hospitalized <dbl> NA, 13330, 3790, NA, 21143, NA, 6784, 1...
## $ dateChecked <dttm> 2020-08-20 00:00:00, 2020-08-20 11:00:...
## $ totalTestsViral <dbl> 312647, 891813, 648509, NA, 1116897, 10...
## $ positiveTestsViral <dbl> 4970, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ negativeTestsViral <dbl> 307360, NA, 593744, NA, NA, NA, NA, NA,...
## $ positiveCasesViral <dbl> 5332, 107483, 54765, 0, 194734, 644751,...
## $ deathConfirmed <dbl> 29, 1905, NA, NA, 4429, NA, NA, 3572, N...
## $ deathProbable <dbl> NA, 69, NA, NA, 255, NA, NA, 886, NA, 6...
## $ totalTestEncountersViral <dbl> NA, NA, NA, NA, NA, NA, 895207, NA, NA,...
## $ totalTestsPeopleViral <dbl> NA, NA, NA, 1514, NA, NA, 645170, NA, 2...
## $ totalTestsAntibody <dbl> NA, NA, NA, NA, 255456, NA, 150931, NA,...
## $ positiveTestsAntibody <dbl> NA, NA, NA, NA, NA, NA, 10406, NA, NA, ...
## $ negativeTestsAntibody <dbl> NA, NA, NA, NA, NA, NA, 140525, NA, NA,...
## $ totalTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ positiveTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ negativeTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsPeopleAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ positiveTestsPeopleAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsAntigen <dbl> NA, NA, 10358, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ fips <dbl> 2, 1, 5, 60, 4, 6, 8, 9, 11, 10, 12, 13...
## $ positiveIncrease <dbl> 85, 971, 549, 0, 723, 5920, 270, 118, 5...
## $ negativeIncrease <dbl> 1713, 10462, 6680, 0, 6481, 81363, 4657...
## $ total <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ totalTestResults <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ totalTestResultsIncrease <dbl> 1798, 11433, 7229, 0, 7204, 87283, 7348...
## $ posNeg <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ deathIncrease <dbl> 0, 30, 10, 0, 50, 163, 12, 1, 1, 0, 119...
## $ hospitalizedIncrease <dbl> 0, 250, 47, 0, 123, 0, 3, 72, 0, 0, 450...
## $ hash <chr> "c83a1d575a597788adccbe170950b8d197754b...
## $ commercialScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeRegularScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ positiveScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ score <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ grade <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
cvData %>%
select(date, state) %>%
anyDuplicated()
## [1] 0
As expected, the file is unique by date and state. The date field has been converted from double to date. The main columns of interest will be:
A smaller frame containing only this data is created:
cvUse <- cvData %>%
select(date, state, cases=positiveIncrease, deaths=deathIncrease)
cvUse %>%
summarize_if(is.numeric, sum)
## # A tibble: 1 x 2
## cases deaths
## <dbl> <dbl>
## 1 5546056 166127
The numeric totals match those reported by the COVID Tracking Project for the same date. They are roughly 5% lower than the totals reported by worldometers.info. There are significant issues associated with official reporting for corornavirus, and a 5% discrepancy between sources is not unexpected.
The data are next checked for totals by state and by week:
cvUse %>%
group_by(state) %>%
summarize_if(is.numeric, sum) %>%
pivot_longer(-state) %>%
ggplot(aes(x=fct_reorder(state, value), y=value)) +
geom_col(fill="lightblue") +
coord_flip() +
facet_wrap(~name, scales="free_x") +
labs(x="", y="", title="Coronavirus cases and deaths by state through August 20, 2020")
cvUse %>%
group_by(week=lubridate::epiweek(date)) %>%
summarize_if(is.numeric, sum) %>%
pivot_longer(-week) %>%
ggplot(aes(x=week, y=value)) +
geom_line() +
facet_wrap(~name, scales="free_y") +
labs(x="", y="", title="Coronavirus cases and deaths by epidemiological week through August 20, 2020")
Sort order by state generally matches published reports of coronavirus burden by state. The weekly data appear broadly aligned with other published data. The dip in the final week is due to only 5 of the 7 days of the week being included in the Thursday data file.
State population data (2015 estimates) are obtained from usmap for converting metrics to per capita, and the cvData file is filtered to only those observations contained in the state population file:
statePop <- usmap::statepop %>%
select(state=abbr, name=full, pop_2015)
glimpse(statePop)
## Observations: 51
## Variables: 3
## $ state <chr> "AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "DC", "FL"...
## $ name <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "California", ...
## $ pop_2015 <dbl> 4858979, 738432, 6828065, 2978204, 39144818, 5456574, 3590...
cvUse <- cvUse %>%
semi_join(statePop)
## Joining, by = "state"
cvUse %>%
summarize_if(is.numeric, sum)
## # A tibble: 1 x 2
## cases deaths
## <dbl> <dbl>
## 1 5516295 165742
Over 99% of the cases and deaths are preserved when the territories and other non-state data are removed.
There appear to be at least two peaks in the case and death data, likely driven by different locales experiencing outbreaks at different times. Per capita cases and deaths by state are plotted:
# Per capita metrics by state
cvStatePerCapita <- cvUse %>%
group_by(state) %>%
summarize_if(is.numeric, sum) %>%
inner_join(statePop) %>%
mutate(cases=1000000*cases/pop_2015, deaths=1000000*deaths/pop_2015)
## Joining, by = "state"
# Disease burden by state, per capita
cvStatePerCapita %>%
ggplot(aes(x=cases, y=deaths)) +
geom_text(aes(label=state)) +
labs(x="Cases per million through August 20", y="Deaths per million through August 20")
States will be defined as having a low impact of disease if 1) deaths per million are 100 or less, and 2) cases per million are 10000 or less:
lowBurden <- cvStatePerCapita %>%
filter(deaths <= 100, cases <= 10000) %>%
pull(state)
Next, the states that are not defined as low burden are hierarchically clustered, using total deaths per capita by week. Due to very significant expansions in testing volume both by state and within state over time, death data is likely more representative of disease burden by time than cases data. Deaths per capita by state by month are capped at 300 since otherwise the distance between the extremely high states (which is not so meaningful here) dominates the differences in early vs. late disease bruden:
# Calculate the raw data
clustData <- cvUse %>%
filter(!(state %in% lowBurden)) %>%
inner_join(statePop) %>%
mutate(month=lubridate::month(date), cpm=1000000*cases/pop_2015, dpm=1000000*deaths/pop_2015) %>%
filter(date >= as.Date("2020-03-15")) %>%
group_by(state, month) %>%
summarize(dpm=sum(dpm), cpm=sum(cpm), n=n()) %>%
pivot_wider(state, names_from=month, values_from=c(dpm, cpm)) %>%
ungroup()
## Joining, by = "state"
# Run clusters without normalization, but with dpm limited to 300
distData <- clustData %>%
select(state, starts_with("dpm")) %>%
mutate_if(is.numeric, .funs=~pmin(., 300)) %>%
column_to_rownames("state")
cvTree <- hclust(dist(distData))
# Plot the dendrogram
plot(cvTree)
There appears to be a cluster of states that had early outbreaks, a cluster of states that had later outbreaks, and a large segment that falls in between these extremes. Suppose the dendrogram is split in to three clusters, with the low burden states added as a fourth cluster:
# Get the clusters from the tree, adding the low burden states as cluster 4
cvClusters <- c(cutree(cvTree, k=3),
rep(4, length(lowBurden)) %>% set_names(lowBurden)
)
# Add the clusters to the population data file
statePop <- statePop %>%
mutate(cluster=factor(cvClusters[state]))
# Show a map of the clusters
usmap::plot_usmap(regions="states", data=statePop, values="cluster")
# Show population totals by cluster
statePop %>%
group_by(cluster) %>%
summarize(pop_2015=sum(pop_2015)/1000000) %>%
ggplot(aes(x=fct_reorder(cluster, pop_2015), y=pop_2015)) +
geom_col(fill="lightblue") +
geom_text(aes(y=pop_2015/2, label=round(pop_2015))) +
labs(y="2015 population (millions)", x="Cluster", title="Population by cluster (millions)") +
coord_flip()
# Virus by week by cluster
cvUse %>%
mutate(cluster=factor(cvClusters[state]), week=lubridate::epiweek(date)) %>%
group_by(cluster, week) %>%
summarize(cases=sum(cases), deaths=sum(deaths)) %>%
pivot_longer(-c(week, cluster)) %>%
ggplot(aes(x=week, y=value, group=cluster, color=cluster)) +
geom_line() +
facet_wrap(~name, scales="free_y")
Metrics can be normalized by population to look at coronavirus burden per capita by segment over time:
# Integrated data file
cvWeekPop <- cvUse %>%
mutate(week=lubridate::epiweek(date)) %>%
inner_join(statePop, by="state")
# Summarized by date-cluster
cvDateCluster <- cvWeekPop %>%
group_by(date, cluster) %>%
summarize(cases=sum(cases), deaths=sum(deaths)) %>%
inner_join(statePop %>% group_by(cluster) %>% summarize(pop_mill=sum(pop_2015)/1000000), by="cluster") %>%
group_by(cluster) %>%
mutate(cpm7=zoo::rollmean(cases, 7, fill=NA)/pop_mill,
dpm7=zoo::rollmean(deaths, 7, fill=NA)/pop_mill
) %>%
ungroup()
# Plotted by date
cvDateCluster %>%
select(date, cluster, cases=cpm7, deaths=dpm7) %>%
pivot_longer(-c(date, cluster)) %>%
filter(!is.na(value)) %>%
ggplot(aes(x=date, y=value, group=cluster, color=cluster)) +
geom_line() +
facet_wrap(~name, scales="free_y") +
labs(x="", y="Rolling 7-day mean, per million", title="Rolling 7-day mean disease burden, per million")
Broadly speaking:
There appear to be meaningful differences in disease burden over time, and with a meaningful geographical explanatory component.
Next, the total volume of disease through August 20 is explored by state:
varMapper <- c("cases"="Cases through Aug 20",
"newCases"="Increase in cases, 30 days through Aug 20",
"casesroll7"="Rolling 7-day mean cases, through Aug 20",
"deaths"="Deaths through Aug 20",
"newDeaths"="Increase in deaths, 30 days through Aug 20",
"deathsroll7"="Rolling 7-day mean deaths, through Aug 20",
"cpm"="Cases through Aug 20 (per million)",
"cpm7"="Cases per day (7-day rolling mean) through Aug 20 (per million)",
"newcpm"="Increase in cases, 30 days through Aug 20 (per million)",
"dpm"="Deaths through Aug 20 (per million)",
"dpm7"="Deaths per day (7-day rolling mean) through Aug 20 (per million)",
"newdpm"="Increase in deaths, 30 days through Aug 20 (per million)",
"hpm7"="Currently Hospitalized per million (7-day rolling mean)"
)
cvWeekPop %>%
group_by(state, cluster) %>%
summarize(cases=sum(cases), deaths=sum(deaths), pop_2015=mean(pop_2015)) %>%
ungroup() %>%
mutate(cpm=1000000*cases/pop_2015, dpm=1000000*deaths/pop_2015) %>%
select(state, cluster, cases, deaths) %>%
pivot_longer(c(cases, deaths)) %>%
ggplot(aes(x=fct_reorder(state, value, .fun=min), y=value)) +
geom_col(aes(fill=cluster)) +
coord_flip() +
labs(x="", y="", title="Coronavirus impact by state through August 20, 2020") +
facet_wrap(~varMapper[name], scales="free_x")
cvWeekPop %>%
group_by(state, cluster) %>%
summarize(cases=sum(cases), deaths=sum(deaths), pop_2015=mean(pop_2015)) %>%
ungroup() %>%
mutate(cpm=1000000*cases/pop_2015, dpm=1000000*deaths/pop_2015) %>%
select(state, cluster, cpm, dpm) %>%
pivot_longer(c(cpm, dpm)) %>%
ggplot(aes(x=fct_reorder(state, value, .fun=min), y=value)) +
geom_col(aes(fill=cluster)) +
coord_flip() +
labs(x="", y="", title="Coronavirus impact by state through August 20, 2020") +
facet_wrap(~varMapper[name], scales="free_x")
As expected, the segmentation approach has largely divided the states by total coronavirus burden. Mississippi and Arizona are in segment 1 due to the late nature of their outbreak.
Further, the data are explored for a combination of total disease burden and change over the past 30 days:
cvWeekPop %>%
mutate(newCases=ifelse(as.Date("2020-08-21")-date <= 30, cases, 0),
newDeaths=ifelse(as.Date("2020-08-21")-date <= 30, deaths, 0)
) %>%
group_by(state, cluster) %>%
summarize(cases=sum(cases),
deaths=sum(deaths),
newCases=sum(newCases),
newDeaths=sum(newDeaths),
pop_2015=mean(pop_2015)
) %>%
ungroup() %>%
mutate(cpm=1000000*cases/pop_2015,
dpm=1000000*deaths/pop_2015,
newcpm=1000000*newCases/pop_2015,
newdpm=1000000*newDeaths/pop_2015
) %>%
ggplot(aes(x=cpm, y=newcpm)) +
geom_text(aes(color=cluster, label=state)) +
labs(x=varMapper["cpm"],
y=varMapper["newcpm"],
title="Coronavirus impact by state through August 20, 2020"
) +
geom_abline(lty=2, slope=c(0.5)) +
lims(x=c(0, NA), y=c(0, NA)) +
annotate("text", x=18000, y=11000, label="50% of total cases\nin last 30 days", hjust=1) +
annotate("segment", x=18500, y=10500, xend=20000, yend=10000, arrow=arrow(), lty=2)
cvWeekPop %>%
mutate(newCases=ifelse(as.Date("2020-08-21")-date <= 30, cases, 0),
newDeaths=ifelse(as.Date("2020-08-21")-date <= 30, deaths, 0)
) %>%
group_by(state, cluster) %>%
summarize(cases=sum(cases),
deaths=sum(deaths),
newCases=sum(newCases),
newDeaths=sum(newDeaths),
pop_2015=mean(pop_2015)
) %>%
ungroup() %>%
mutate(cpm=1000000*cases/pop_2015,
dpm=1000000*deaths/pop_2015,
newcpm=1000000*newCases/pop_2015,
newdpm=1000000*newDeaths/pop_2015
) %>%
ggplot(aes(x=dpm, y=newdpm)) +
geom_text(aes(color=cluster, label=state)) +
labs(x=varMapper["dpm"],
y=varMapper["newdpm"],
title="Coronavirus impact by state through August 20, 2020"
) +
geom_abline(lty=2, slope=c(0.5)) +
lims(x=c(0, NA), y=c(0, NA)) +
annotate("text", x=250, y=200, label="50% of total deaths\nin last 30 days", hjust=1) +
annotate("segment", x=250, y=200, xend=400, yend=200, arrow=arrow(), lty=2)
The clusters appear relatively well separated, with the possible exception of Louisiana which is arguably quite close to cluster 1. Cluster 3 stands out as having had a very high overall impact, but with not much of an increase in the past 30 days.
The individual trends by state are also plotted, smoothed by week:
cvWeekPop %>%
rbind(mutate(., state="cluster")) %>%
group_by(state, cluster, date) %>%
summarize_if(is.numeric, sum) %>%
ungroup() %>%
mutate(cpm=1000000*cases/pop_2015, dpm=1000000*deaths/pop_2015) %>%
group_by(state, cluster) %>%
mutate(cpm7=zoo::rollmean(cpm, k=7, fill=NA), dpm7=zoo::rollmean(dpm, k=7, fill=NA)) %>%
ungroup() %>%
filter(!is.na(cpm7)) %>%
ggplot(aes(x=date, y=cpm7)) +
geom_line(data=~filter(., state != "cluster"), aes(group=state), alpha=0.25) +
geom_line(data=~filter(., state == "cluster"), aes(group=state, color=cluster), lwd=1.5) +
facet_wrap(~cluster, scales="free_y") +
labs(x="",
y=varMapper["cpm"],
title="Cases per million per day (rolling 7-day mean) by state and cluster",
subtitle="Caution that each facet has its own y axis with different scales"
) +
ylim(c(0, NA))
cvWeekPop %>%
rbind(mutate(., state="cluster")) %>%
group_by(state, cluster, date) %>%
summarize_if(is.numeric, sum) %>%
ungroup() %>%
mutate(cpm=1000000*cases/pop_2015, dpm=1000000*deaths/pop_2015) %>%
group_by(state, cluster) %>%
mutate(cpm7=zoo::rollmean(cpm, k=7, fill=NA), dpm7=zoo::rollmean(dpm, k=7, fill=NA)) %>%
ungroup() %>%
filter(!is.na(dpm7)) %>%
ggplot(aes(x=date, y=dpm7)) +
geom_line(data=~filter(., state != "cluster"), aes(group=state), alpha=0.25) +
geom_line(data=~filter(., state == "cluster"), aes(group=state, color=cluster), lwd=1.5) +
facet_wrap(~cluster, scales="free_y") +
labs(x="",
y=varMapper["dpm"],
title="Deaths per million per day (rolling 7-day mean) by state and cluster",
subtitle="Caution that each facet has its own y axis with different scales"
) +
ylim(c(0, NA))
With a few exceptions in a rather noisy segment 2 (as well as Louisiana in segment 3), states seem to broadly follow the disease state pattern for their cluster, though with some differences in magnitude and timing.
The process is converted to functional form so that it can be run using different data. First, a function is written to read in the data:
# Function to read in the raw coronavirus data file (assume it is already downloaded)
readCVData <- function(fileName,
showGlimpse=TRUE,
uqVars=c("date", "state"),
errDups=TRUE
) {
# FUNCTION ARGUMENTS
# fileName: location of the downloded CSV file from COVID Tracking Project
# showGlimpse: boolean, whether to run glimpse() on the file
# uqVars: variables that the file is expected to be unique by
# errDups: boolean, whether to error out if uniqueness is violated
# Read in the file and convert the 'date' from double to date
cvData <- readr::read_csv(fileName) %>%
mutate(date=lubridate::ymd(date))
# See a sample of the data
if (showGlimpse) glimpse(cvData)
# Check that the data are unique by date and state
nDups <- cvData %>%
select_at(vars(all_of(uqVars))) %>%
anyDuplicated()
# Inform of the uniqueness check results
if (nDups==0) {
cat("\nFile is unique by:", uqVars, "and has dimensions:", dim(cvData), "\n")
} else {
cat("\nUniqueness check failed, file has duplicates by:", uqVars, "\n")
if (errDups) stop("Fix and re-run")
}
# Return the file
cvData
}
cvFull <- readCVData("./RInputFiles/Coronavirus/CV_downloaded_200820.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## state = col_character(),
## dataQualityGrade = col_character(),
## lastUpdateEt = col_character(),
## dateModified = col_datetime(format = ""),
## checkTimeEt = col_character(),
## dateChecked = col_datetime(format = ""),
## hash = col_character(),
## grade = col_logical()
## )
## See spec(...) for full column specifications.
## Observations: 9,449
## Variables: 53
## $ date <date> 2020-08-20, 2020-08-20, 2020-08-20, 20...
## $ state <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO...
## $ positive <dbl> 5332, 112449, 54765, 0, 196280, 644751,...
## $ negative <dbl> 307315, 784330, 593744, 1514, 922163, 9...
## $ pending <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ hospitalizedCurrently <dbl> 51, 1105, 499, NA, 1070, 6212, 238, 47,...
## $ hospitalizedCumulative <dbl> NA, 13330, 3790, NA, 21143, NA, 6784, 1...
## $ inIcuCurrently <dbl> NA, NA, NA, NA, 388, 1707, NA, NA, 26, ...
## $ inIcuCumulative <dbl> NA, 1348, NA, NA, NA, NA, NA, NA, NA, N...
## $ onVentilatorCurrently <dbl> 6, NA, 108, NA, 233, NA, NA, NA, 12, NA...
## $ onVentilatorCumulative <dbl> NA, 734, 488, NA, NA, NA, NA, NA, NA, N...
## $ recovered <dbl> 1513, 44684, 48458, NA, 28471, NA, 5759...
## $ dataQualityGrade <chr> "A", "B", "A", "C", "A+", "B", "A", "B"...
## $ lastUpdateEt <chr> "8/20/2020 0:00", "8/20/2020 11:00", "8...
## $ dateModified <dttm> 2020-08-20 00:00:00, 2020-08-20 11:00:...
## $ checkTimeEt <chr> "8/19/2020 20:00", "8/20/2020 7:00", "8...
## $ death <dbl> 29, 1974, 641, 0, 4684, 11686, 1800, 44...
## $ hospitalized <dbl> NA, 13330, 3790, NA, 21143, NA, 6784, 1...
## $ dateChecked <dttm> 2020-08-20 00:00:00, 2020-08-20 11:00:...
## $ totalTestsViral <dbl> 312647, 891813, 648509, NA, 1116897, 10...
## $ positiveTestsViral <dbl> 4970, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ negativeTestsViral <dbl> 307360, NA, 593744, NA, NA, NA, NA, NA,...
## $ positiveCasesViral <dbl> 5332, 107483, 54765, 0, 194734, 644751,...
## $ deathConfirmed <dbl> 29, 1905, NA, NA, 4429, NA, NA, 3572, N...
## $ deathProbable <dbl> NA, 69, NA, NA, 255, NA, NA, 886, NA, 6...
## $ totalTestEncountersViral <dbl> NA, NA, NA, NA, NA, NA, 895207, NA, NA,...
## $ totalTestsPeopleViral <dbl> NA, NA, NA, 1514, NA, NA, 645170, NA, 2...
## $ totalTestsAntibody <dbl> NA, NA, NA, NA, 255456, NA, 150931, NA,...
## $ positiveTestsAntibody <dbl> NA, NA, NA, NA, NA, NA, 10406, NA, NA, ...
## $ negativeTestsAntibody <dbl> NA, NA, NA, NA, NA, NA, 140525, NA, NA,...
## $ totalTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ positiveTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ negativeTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsPeopleAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ positiveTestsPeopleAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsAntigen <dbl> NA, NA, 10358, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ fips <dbl> 2, 1, 5, 60, 4, 6, 8, 9, 11, 10, 12, 13...
## $ positiveIncrease <dbl> 85, 971, 549, 0, 723, 5920, 270, 118, 5...
## $ negativeIncrease <dbl> 1713, 10462, 6680, 0, 6481, 81363, 4657...
## $ total <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ totalTestResults <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ totalTestResultsIncrease <dbl> 1798, 11433, 7229, 0, 7204, 87283, 7348...
## $ posNeg <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ deathIncrease <dbl> 0, 30, 10, 0, 50, 163, 12, 1, 1, 0, 119...
## $ hospitalizedIncrease <dbl> 0, 250, 47, 0, 123, 0, 3, 72, 0, 0, 450...
## $ hash <chr> "c83a1d575a597788adccbe170950b8d197754b...
## $ commercialScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeRegularScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ positiveScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ score <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ grade <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
##
## File is unique by: date state and has dimensions: 9449 53
Next, a function selects only the key variables of interest, filters to include only states (plus DC), and reports on relevant control totals:
# Function to select relevant variables and observations, and report on control totals
processCVData <- function(dfFull,
varsKeep=c("date", "state", "positiveIncrease", "deathIncrease"),
varsRename=c("positiveIncrease"="cases", "deathIncrease"="deaths"),
stateList=c(state.abb, "DC")
) {
# FUNCTION ARGUMENTS
# dfFull: the full data file originally loaded
# varsKeep: variables to keep from the full file
# varsRename: variables to be renamed, using a named vector of form originalName=newName
# stateList: variables for filtering state (NULL means do not run any filters)
# Select only the key variables
df <- dfFull %>%
select_at(vars(all_of(varsKeep)))
# Apply the renaming of variables
names(df) <- ifelse(is.na(varsRename[names(df)]), names(df), varsRename[names(df)])
# Designate each record as being either a valid state or not
if (!is.null(stateList)) {
df <- df %>%
mutate(validState=state %in% stateList)
} else {
df <- df %>%
mutate(validState=TRUE)
}
# Summarize the control totals for the data, based on whether the state is valid
cat("\n\nControl totals - note that validState other than TRUE will be discarded\n\n")
df %>%
mutate(n=1) %>%
group_by(validState) %>%
summarize_if(is.numeric, sum) %>%
print()
# Return the file, filtered to where validState is TRUE, and deleting variable validState
df %>%
filter(validState) %>%
select(-validState)
}
cvFiltered <- processCVData(cvFull)
##
##
## Control totals - note that validState other than TRUE will be discarded
##
## # A tibble: 2 x 4
## validState cases deaths n
## <lgl> <dbl> <dbl> <dbl>
## 1 FALSE 29761 385 790
## 2 TRUE 5516295 165742 8659
Next, a state population data is processed for future use:
# Function to extract and format key state data
getStateData <- function(df=usmap::statepop,
renameVars=c("abbr"="state", "full"="name", "pop_2015"="pop"),
keepVars=c("state", "name", "pop")
) {
# FUNCTION ARGUMENTS:
# df: the data frame containing state data
# renameVars: variables to be renamed, using named list with format "originalName"="newName"
# keepVars: variables to be kept in the final file
# Rename variables where appropriate
names(df) <- ifelse(is.na(renameVars[names(df)]), names(df), renameVars[names(df)])
# Return file with only key variables kept
df %>%
select_at(vars(all_of(keepVars)))
}
stateData <- getStateData()
Next, helper functions are written to convert a variable to per capita, or to convert a variable to a “rolling” mean:
# Helper function to create per capita metrics
helperPerCapita <- function(df,
origVar,
newName,
byVar="state",
popVar="pop",
popData=stateData,
mult=1000000
) {
# FUNCTION ARGUMENTS:
# df: the data frame currently being processed
# origVar: the variables to be converted to per capita
# newName: the new per capita variable name
# byVar: the variable that will be merged by
# popVar: the name of the population variable in the popData file
# popData: the file containing the population data
# mult: the multiplier, so that the metric is "per mult people"
# Create the per capita variable
df %>%
inner_join(select_at(popData, vars(all_of(c(byVar, popVar)))), by=byVar) %>%
mutate(!!newName:=mult*get(origVar)/get(popVar)) %>%
select(-all_of(popVar))
}
# Helper function to create rolling aggregates
helperRollingAgg <- function(df,
origVar,
newName,
func=zoo::rollmean,
k=7,
fill=NA,
...
) {
# FUNCTION ARGUMENTS:
# df: the data frame containing the data
# origVar: the original data column name
# newName: the new variable column name
# func: the function to be applied (zoo::rollmean will be by far the most common)
# k: the periodicity (k=7 is rolling weekly data)
# fill: how to fill leading.trailing data to maintain the same vector lengths
# ...: any other arguments to be passed to func
# Create the appropriate variable
df %>%
mutate(!!newName:=func(get(origVar), k=k, fill=fill, ...))
}
# Function to add per capita and rolling to the base data frame
helperMakePerCapita <- function(df,
k=7
) {
# FUNCTION ARGUMENTS:
# df: the initial data frame for conversion
# k: the rolling time period to use
# Create the variables for cpm, dpm, cpm7, and dpm7
dfNew <- df %>%
helperPerCapita(origVar="cases", newName="cpm") %>%
helperPerCapita(origVar="deaths", newName="dpm") %>%
group_by(state) %>%
arrange(date) %>%
helperRollingAgg(origVar="cpm", newName=paste0("cpm", k), k=k) %>%
helperRollingAgg(origVar="dpm", newName=paste0("dpm", k), k=k) %>%
ungroup()
# Return the new data frame
dfNew
}
# Create the variables for cpm, dpm, cpm7, and dpm7
cvFilteredPerCapita <- helperMakePerCapita(cvFiltered, k=7)
cvFilteredPerCapita
## # A tibble: 8,659 x 8
## date state cases deaths cpm dpm cpm7 dpm7
## <date> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2020-01-22 WA 0 0 0 0 NA NA
## 2 2020-01-23 WA 0 0 0 0 NA NA
## 3 2020-01-24 WA 0 0 0 0 NA NA
## 4 2020-01-25 WA 0 0 0 0 0 0
## 5 2020-01-26 WA 0 0 0 0 0.0199 0
## 6 2020-01-27 WA 0 0 0 0 0.0199 0
## 7 2020-01-28 WA 0 0 0 0 0.0199 0
## 8 2020-01-29 WA 1 0 0.139 0 0.0398 0
## 9 2020-01-30 WA 0 0 0 0 0.0797 0
## 10 2020-01-31 WA 0 0 0 0 0.0996 0
## # ... with 8,649 more rows
Next, a function is written for creating side-by-side cases and death bar plots:
# Function to create side-by-side plots for a deaths and cases metric
# Data in df will be aggregated to be unique by byVar using aggFunc
helperBarDeathsCases <- function(df,
numVars,
title="",
xVar="state",
fillVar=NULL,
aggFunc=sum,
mapper=varMapper
) {
# FUNCTION ARGUMENTS:
# df: the data frame containing the data
# numVars: the relevant numeric variables for plotting
# title: plot title, default is nothing
# xVar: the x-axis variable for plotting
# fillVar: the variable that will color the bars in the final plot (NULL means use "lightblue" for all)
# aggFunc: the aggregate function (will be applied to create data unique by byVar)
# mapper: mapping file to convert x/y variables to descriptive axes (named vector "variable"="label")
# OVERALL FUNCTION PROCESS:
# 1. Variables in numVar are aggregated by aggFunc to be unique by c(xVar, fillVar)
# 2. Data are pivoted longer
# 3. Bar charts are created, with coloring by fillVar if provided
# Create the byVar for summing
byVar <- xVar
if (!is.null(fillVar)) { byVar <- c(byVar, fillVar) }
# Process the data and create the plot
p1 <- df %>%
select_at(vars(all_of(c(byVar, numVars)))) %>%
group_by_at(vars(all_of(byVar))) %>%
summarize_all(aggFunc) %>%
pivot_longer(-all_of(byVar)) %>%
ggplot(aes(x=fct_reorder(get(xVar), value, .fun=min), y=value)) +
coord_flip() +
facet_wrap(~mapper[name], scales="free_x") +
labs(x="", y="", title=title) +
if (is.null(fillVar)) geom_col(fill="lightblue") else geom_col(aes_string(fill=fillVar))
# Print the plot
print(p1)
}
A function is written to assess the raw state-level totals:
# Function to assess state data (no segments created yet)
assessStateData <- function(df,
titleStem="Coronavirus burden by state",
cfrEst=0.005,
mapper=varMapper
) {
# FUNCTION ARGUMENTS:
# df: the data frame containing the state-level data
# titleStem: the main title description, with (total) or (per capita) appended
# cfrEst: the estimated case fatality rate (CFR); a dashed abline will be plotted at this slope
# mapper: mapping file to convert x/y variables to descriptive axes (named vector "variable"="label")
# Plot cases and deaths by state, once for overall and once per capita
helperBarDeathsCases(df, numVars=c("deaths", "cases"), title=paste0(titleStem, " (total)"))
helperBarDeathsCases(df, numVars=c("dpm", "cpm"), title=paste0(titleStem, " (per capita)"))
# Disease burden by state, per capita
p1 <- df %>%
group_by(state) %>%
summarize(cpm=sum(cpm), dpm=sum(dpm)) %>%
ggplot(aes(x=cpm, y=dpm)) +
geom_text(aes(label=state)) +
labs(x=mapper["cpm"],
y=mapper["dpm"],
title="Deaths vs. cases by state (per million people)",
subtitle=paste0("Dashed line is a CFR of ",
round(100*cfrEst, 1),
"% (states far from this may have case counting issues)"
)
) +
geom_abline(slope=cfrEst, lty=2)
print(p1)
# Total disease burden nationally by day, not using functional form
p2 <- df %>%
select(date, cases, deaths) %>%
group_by(date) %>%
summarize_if(is.numeric, sum) %>%
ungroup() %>%
helperRollingAgg(origVar="cases", newName="casesroll7") %>%
helperRollingAgg(origVar="deaths", newName="deathsroll7") %>%
select(-cases, -deaths) %>%
pivot_longer(-date) %>%
filter(!is.na(value)) %>%
ggplot(aes(x=date, y=value)) +
geom_line() +
facet_wrap(~varMapper[name], scales="free_y") +
labs(x="",
y="",
title=titleStem
)
print(p2)
}
# State-level assessments
assessStateData(cvFilteredPerCapita)
Next, functions for creating and assessing clusters are created. The approach can use either hierarchical clustering or k-means, and focus on the following variables:
Cases is a tricky clustering variable since detection rates were in the single-digit percentages early in the outbreak (estimates of ~50x as many infected as diagnosed). As testing volumes increased, it is likely that a greater percentage of cases are diagnosed. States that have later outbreaks appear to have many more cases per capita but with a lower death rate per capita:
# Function to create an elbow plot for various numbers of clusters in the data
helperElbow <- function(mtx,
testCenters,
iter.max,
nstart,
silhouette=FALSE
) {
# FUNCTION ARGUMENTS:
# mtx: a numeric matrix, or an object that can be coerced to a numeric matrix (no character fields)
# testCenters: integer vector for the centers to be tested
# iter.max: parameter passed to kmeans
# nstart: parameter passed to kmeans
# silhouette: whether to calculate the silhouette score
# Create an object for storing tot.withinss and silhouetteScore
totWithin <- vector("numeric", length(testCenters))
silhouetteScore <- vector("numeric", length(testCenters))
# Create the distancing data (required for silhouette score)
if (silhouette) distData <- dist(mtx)
# Run k-means for every value in testCenters, and store $tot.withinss (and silhouetteScore, if requested)
n <- 1
for (k in testCenters) {
km <- kmeans(mtx, centers=k, iter.max=iter.max, nstart=nstart)
totWithin[n] <- km$tot.withinss
if (silhouette & (k > 1)) silhouetteScore[n] <- mean(cluster::silhouette(km$cluster, distData)[, 3])
n <- n + 1
}
# Create the elbow plot
p1 <- tibble::tibble(n=testCenters, wss=totWithin) %>%
ggplot(aes(x=n, y=wss)) +
geom_point() +
geom_line() +
geom_text(aes(y=wss + 0.05*max(totWithin), x=n+0.2, label=round(wss, 1))) +
labs(x="Number of segments", y="Total Within Sum-Squares", title="Elbow plot") +
ylim(c(0, NA))
# Create the silhouette plot if requested
if (silhouette) {
p2 <- tibble::tibble(n=testCenters, ss=silhouetteScore) %>%
ggplot(aes(x=n, y=ss)) +
geom_point() +
geom_line() +
geom_text(aes(y=ss + 0.05*max(silhouetteScore), x=n+0.2, label=round(ss, 1))) +
labs(x="Number of segments", y="Mean silhouette width", title="Silhouette plot") +
ylim(c(-1, NA))
gridExtra::grid.arrange(p1, p2, nrow=1)
} else {
print(p1)
}
}
# Function to create clusters for the state data (requires all data from same year, as currently true)
clusterStates <- function(df,
caseVar="cpm",
deathVar="dpm",
shapeFunc=lubridate::month,
minShape=NULL,
minDeath=0,
minCase=0,
ratioTotalvsShape=1,
ratioDeathvsCase=1,
hierarchical=TRUE,
hierMethod="complete",
nCenters=3,
iter.max=10,
nstart=1,
testCenters=NULL,
returnList=FALSE,
seed=NULL
) {
# FUNCTION ARGUMENTS:
# df: the data frame containing cases and deaths data
# caseVar: the variable containing the cases per capita data
# deathVar: the variable containing the deaths per capita data
# shapeFunc: the function to be used for creating the shape of the curve
# minShape: the minimum value to be used for shape (to avoid very small amounts of data in Jan/Feb)
# NULL means keep everything
# minDeath: use this value as a floor for the death metric when calculating shape
# minCase: use this metric as a floor for the case metric when calculating shape
# ratioTotalvsShape: amount of standard deviation to be kept in total variable vs shape variables
# ratioDeathvsCase: amount of standard deviation to be kept in deaths vs cases
# (total death data will be scaled to have sd this many times higher than cases)
# (death percentages by time period will be scaled directly by this amount)
# hierarchical: boolean, if TRUE run hierarchical clustering, otherwise run k-means clustering
# only hierarchical clustering is currently implemented
# hierMethod: the method for hierarchical clustering (e.g., 'complete' or 'single')
# nCenters: the number of centers to use for kmeans clustering
# testCenters: integer vector of centers to test (will create an elbow plot); NULL means do not test
# iter.max: maximumum number of kmeans iterations (default in kmeans algorithm is 10)
# nstart: number of random sets chosen for kmeans (default in kmeans algorithm is 1)
# returnList: boolean, if FALSE just the cluster object is returned
# if TRUE, a list is returned with dfCluster and the cluster object
# seed: set the seed to this value (NULL means no seed)
# Extract key information (aggregates and by shapeFunc for each state)
df <- df %>%
select_at(vars(all_of(c("date", "state", caseVar, deathVar)))) %>%
purrr::set_names(c("date", "state", "cases", "deaths")) %>%
mutate(timeBucket=shapeFunc(date)) %>%
group_by(state, timeBucket) %>%
summarize(cases=sum(cases), deaths=sum(deaths)) %>%
ungroup()
# Limit to only relevant time buckets if requested
if (!is.null(minShape)) {
df <- df %>%
filter(timeBucket >= minShape)
}
# Extract an aggregate by state, scaled so that they have the proper ratio
dfAgg <- df %>%
group_by(state) %>%
summarize(totalCases=sum(cases), totalDeaths=sum(deaths)) %>%
ungroup() %>%
mutate(totalDeaths=ratioDeathvsCase*totalDeaths*sd(totalCases)/sd(totalDeaths))
# Extract the percentages (shapes) by month, scaled so that they have the proper ratio
dfShape <- df %>%
pivot_longer(-c(state, timeBucket)) %>%
group_by(state, name) %>%
mutate(tot=pmax(sum(value), ifelse(name=="deaths", minDeath, minCase)),
value=ifelse(name=="deaths", ratioDeathvsCase, 1) * value / tot) %>%
select(-tot) %>%
pivot_wider(state, names_from=c(name, timeBucket), values_from=value) %>%
ungroup()
# Function to calculate SD of a subset of columns
calcSumSD <- function(df) {
df %>%
ungroup() %>%
select(-state) %>%
summarize_all(.funs=sd) %>%
as.vector() %>%
sum()
}
# Down-weight the aggregate data so that there is the proper sum of sd in aggregates and shapes
aggSD <- calcSumSD(dfAgg)
shapeSD <- calcSumSD(dfShape)
dfAgg <- dfAgg %>%
mutate_if(is.numeric, ~. * ratioTotalvsShape * shapeSD / aggSD)
# Combine so there is one row per state
dfCluster <- dfAgg %>%
inner_join(dfShape, by="state")
# convert 'state' to rowname
keyData <- dfCluster %>% column_to_rownames("state")
# Create hierarchical segments or kmeans segments
if (hierarchical) {
objCluster <- hclust(dist(keyData), method=hierMethod)
plot(objCluster)
} else {
# Create an elbow plot if testCenters is not NULL
if (!is.null(testCenters)) {
helperElbow(keyData, testCenters=testCenters, iter.max=iter.max, nstart=nstart, silhouette=TRUE)
}
# Create the kmeans cluster object, setting a seed if requested
if (!is.null(seed)) set.seed(seed)
objCluster <- kmeans(keyData, centers=nCenters, iter.max=iter.max, nstart=nstart)
cat("\nCluster means and counts\n")
n=objCluster$size %>% cbind(objCluster$centers) %>% round(2) %>% t() %>% print()
}
# Return the data and object is a list if returnList is TRUE, otherwise return only the clustering object
if (!isTRUE(returnList)) {
objCluster
} else {
list(objCluster=objCluster, dfCluster=dfCluster)
}
}
# Test clusters that weight deaths heavily vs. cases and that weight shape more highly than total
testCluster <- clusterStates(cvFilteredPerCapita,
minShape=3,
ratioDeathvsCase = 5,
ratioTotalvsShape = 0.5,
minDeath=100,
minCase=10000
)
The clusters can then be assessed against several criteria:
# Helper function to assess 30-day change vs. total
helperRecentvsTotal <- function(df,
xVar,
yVar,
title,
recencyDays=30,
ablineSlope=0.5,
mapper=varMapper,
labelPlot=TRUE,
printPlot=TRUE
) {
# FUNCTION ARGUMENTS:
# df: the tibble containing data by state by day
# xVar: the x-variable
# yVar: the y-variable
# title: the plot title
# recencyDays: number of days to consider as recent
# ablineSlope: dashed line will be drawn with this slope and intercept 0
# mapper: mapping file to convert x/y variables to descriptive axes (named vector "variable"="label")
# labelPlot: boolean, whether to show the labels for each point
# printPlot: boolean, whether to display the plot (if FALSE, plot object is returned)
# Get the most date cutoff
dateCutoff <- df %>% pull(date) %>% max() - recencyDays + 1
cat("\nRecency is defined as", format(dateCutoff, "%Y-%m-%d"), "through current\n")
# Create the plot
p1 <- df %>%
mutate(newCases=ifelse(date >= dateCutoff, cases, 0),
newDeaths=ifelse(date >= dateCutoff, deaths, 0),
newcpm=ifelse(date >= dateCutoff, cpm, 0),
newdpm=ifelse(date >= dateCutoff, dpm, 0)
) %>%
group_by(state, cluster) %>%
summarize_if(is.numeric, .funs=sum) %>%
ungroup() %>%
ggplot(aes_string(x=xVar, y=yVar)) +
labs(x=mapper[xVar],
y=mapper[yVar],
title=title,
subtitle=paste0("Dashed line represents ",
round(100*ablineSlope),
"% of total is new in last ",
recencyDays,
" days"
)
) +
geom_abline(lty=2, slope=ablineSlope) +
lims(x=c(0, NA), y=c(0, NA)) +
theme(legend.position = "bottom")
# Add the appropriate geom (scatterplot if labelPlot is FALSE)
if (labelPlot) p1 <- p1 + geom_text(aes(color=cluster, label=state))
else p1 <- p1 + geom_point(aes(color=cluster), alpha=0.5)
if (isTRUE(printPlot)) {
print(p1)
} else {
p1
}
}
# Function to plot cluster vs. individual elements on a key metric
helperTotalvsElements <- function(df,
keyVar,
title,
aggAndTotal=TRUE,
pctRibbon=0.8,
aggFunc=if(aggAndTotal) median else mean,
mapper=varMapper,
facetScales="free_y",
printPlot=TRUE
) {
# FUNCTION ARGUMENTS:
# df: the data frame containing n-day rolling averages
# keyVar: the variable to be plotted
# title: the plot title
# aggAndTotal: boolean, whether to plot every individual observation along with the cluster aggregate
# pctRibbon: if aggAndTotal is FALSE, a ribbon covering this percentage of the data will be plotted
# aggFunc: how to aggregate the elements to the segment
# CAUTION that this is an aggregate of averages, rather than a population-weighted aggregate
# mapper: the variable mapping file to get the appropriate label for keyVar
# facetScales: the scaling for the facets - "free_y" to let them all float, "fixed" to have them the same
# printPlot: boolean, if TRUE print the plot (otherwise return the plot object)
# Create an appropriate subtitle
subtitle <- if(facetScales=="free_y") {
"Caution that each facet has its own y axis with different scales"
} else if (facetScales=="fixed") {
"All facets are on the same scale"
} else {
"Update subtitle code in function helperTotalvsElements"
}
# Create an appropriate caption
caption <- if(aggAndTotal) {
"Cluster aggregate is median, weighting each observation equally\n(NOT population-weighted)"
} else {
paste0("1. Cluster aggregate is mean over all observations (NOT population-weighted)\n2. Ribbons reflect range covering ", round(pctRibbon*100), "% of observations")
}
# Create the plots for segment-level data
p1 <- df %>%
rbind(mutate(., state="cluster")) %>%
group_by(state, cluster, date) %>%
summarize_at(vars(all_of(keyVar)), .funs=aggFunc) %>%
ungroup() %>%
filter(!is.na(get(keyVar))) %>%
ggplot(aes_string(x="date")) +
geom_line(data=~filter(., state == "cluster"),
aes(y=get(keyVar), group=state, color=cluster),
lwd=1.5
) +
facet_wrap(~cluster, scales=facetScales) +
labs(x="",
y=mapper[keyVar],
title=title,
subtitle=subtitle,
caption=caption
) +
ylim(c(0, NA)) +
theme(legend.position="bottom")
# Add an appropriate individual metric (either every observation or quantiles)
if (aggAndTotal) {
p1 <- p1 +
geom_line(data=~filter(., state != "cluster"),
aes(y=get(keyVar), group=state),
alpha=0.25
)
} else {
dfRibbon <- df %>%
filter(!is.na(get(keyVar))) %>%
group_by(cluster, date) %>%
summarize(ylow=quantile(get(keyVar), 0.5-0.5*pctRibbon),
yhigh=quantile(get(keyVar), 0.5+0.5*pctRibbon)
)
p1 <- p1 +
geom_ribbon(data=dfRibbon,
aes(ymin=ylow, ymax=yhigh),
alpha=0.25
)
}
# Print plot if requested, otherwise return it
if (isTRUE(printPlot)) {
print(p1)
} else {
p1
}
}
# Function to assess clusters
assessClusters <- function(clusters,
dfState=stateData,
dfBurden=cvFilteredPerCapita,
thruLabel="Aug 20, 2020",
isCounty=FALSE,
plotsTogether=FALSE,
clusterPlotsTogether=plotsTogether,
recentTotalTogether=plotsTogether,
clusterAggTogether=plotsTogether
) {
# FUNCTION ARGUMENTS:
# clusters: the named vector containing the clusters by state
# dfState: the file containing the states and populations
# dfBurden: the data containing the relevant per capita burden statistics by state-date
# thruLabel: label for plots for 'data through'
# isCounty: boolean, is this a plot of county-level data that have been named 'state'?
# FALSE means that it is normal state-level data
# plotsTogether: boolean, should plots be consolidated on fewer pages?
# clusterPlotsTogether: boolean, should plots p1-p4 be consolidated?
# recentTotalTogether: boolean, should recent total plots p7-p8 be consolidated?
# clusterAggTogether: boolean, should aggregate plots p9/p11 and p10/p12 be consolidated?
# Attach the clusters to the state population data
dfState <- as.data.frame(clusters) %>%
set_names("cluster") %>%
rownames_to_column("state") %>%
inner_join(dfState, by="state") %>%
mutate(cluster=factor(cluster))
# Plot the segments on a state map (only if !isCounty)
if (isCounty) {
p1 <- dfState %>%
count(cluster) %>%
ggplot(aes(x=fct_rev(cluster), y=n)) +
geom_col(aes(fill=cluster)) +
geom_text(aes(y=n/2, label=n)) +
coord_flip() +
labs(x="", y="# Counties", title="Membership by segment")
} else {
p1 <- usmap::plot_usmap(regions="states", data=dfState, values="cluster") +
scale_fill_discrete("cluster") +
theme(legend.position="right")
}
# Plot the population totals by segment
# Show population totals by cluster
p2 <- dfState %>%
group_by(cluster) %>%
summarize(pop=sum(pop)/1000000) %>%
ggplot(aes(x=fct_rev(cluster), y=pop)) +
geom_col(aes(fill=cluster)) +
geom_text(aes(y=pop/2, label=round(pop))) +
labs(y="2015 population (millions)", x="Cluster", title="Population by cluster (millions)") +
coord_flip()
# Plot the rolling 7-day mean dialy disease burden by cluster
dfPlot <- dfState %>%
inner_join(dfBurden, by="state") %>%
tibble::as_tibble()
# Plot the rolling 7-day mean daily disease burden by cluster
p3 <- dfPlot %>%
select(date, cluster, cases=cpm7, deaths=dpm7) %>%
pivot_longer(-c(date, cluster)) %>%
filter(!is.na(value)) %>%
group_by(date, cluster, name) %>%
summarize(value=median(value)) %>%
ggplot(aes(x=date, y=value)) +
geom_line(aes(group=cluster, color=cluster)) +
facet_wrap(~name, scales="free_y") +
labs(x="",
y="Rolling 7-day mean, per million",
title="Rolling 7-day mean daily disease burden, per million",
subtitle="Median per day for states assigned to cluster"
)
# Plot the total cases and total deaths by cluster
p4 <- dfPlot %>%
group_by(cluster) %>%
summarize(cases=sum(cases), deaths=sum(deaths)) %>%
pivot_longer(-cluster) %>%
ggplot(aes(x=fct_rev(cluster), y=value/1000)) +
geom_col(aes(fill=cluster)) +
geom_text(aes(y=value/2000, label=round(value/1000))) +
coord_flip() +
facet_wrap(~varMapper[name], scales="free_x") +
labs(x="Cluster", y="Burden (000s)", title="Total cases and deaths by segment")
# Place the plots together if plotsTogether is TRUE, otherwise just print
if (isTRUE(clusterPlotsTogether)) {
gridExtra::grid.arrange(p1, p2, p3, p4, nrow=2, ncol=2)
} else {
print(p1); print(p2); print(p3); print(p4)
}
# These are relevant and useful only for state-level data
if (!isCounty) {
# Plot total cases and total deaths by state, colored by cluster
helperBarDeathsCases(dfPlot,
numVars=c("cases", "deaths"),
title=paste0("Coronavirus impact by state through ", thruLabel),
xVar=c("state"),
fillVar=c("cluster")
)
# Plot cases per million and deaths per million by state, colored by cluster
helperBarDeathsCases(dfPlot,
numVars=c("cpm", "dpm"),
title=paste0("Coronavirus impact by state through ", thruLabel),
xVar=c("state"),
fillVar=c("cluster")
)
}
# County-level plots will be point-only; state-level plots will be labelled
# Plot last-30 vs total for cases per million by state, colored by cluster
p7 <- helperRecentvsTotal(dfPlot,
xVar="cpm",
yVar="newcpm",
title=paste0("Coronavirus burden through ", thruLabel),
labelPlot=!isCounty,
printPlot=FALSE
)
# Plot last-30 vs total for deaths per million by state, colored by cluster
p8 <- helperRecentvsTotal(dfPlot,
xVar="dpm",
yVar="newdpm",
title=paste0("Coronavirus burden through ", thruLabel),
labelPlot=!isCounty,
printPlot=FALSE
)
# Print the plots either as a single page or separately
if (isTRUE(recentTotalTogether)) {
gridExtra::grid.arrange(p7, p8, nrow=1)
} else {
print(p7); print(p8)
}
# These are currently only helpful for states (update later to make more useful for counties)
# Plot the cases per million on a free y-scale and a fixed y-scale
p9 <- helperTotalvsElements(dfPlot,
keyVar="cpm7",
aggAndTotal=!isCounty,
title="Cases per million, 7-day rolling mean",
printPlot=FALSE
)
p10 <- helperTotalvsElements(dfPlot,
keyVar="cpm7",
aggAndTotal=!isCounty,
title="Cases per million, 7-day rolling mean",
facetScales="fixed",
printPlot=FALSE
)
# Plot the deaths per million on a free y-scale and a fixed y-scale
p11 <- helperTotalvsElements(dfPlot,
keyVar="dpm7",
aggAndTotal=!isCounty,
title="Deaths per million, 7-day rolling mean",
printPlot=FALSE
)
p12 <- helperTotalvsElements(dfPlot,
keyVar="dpm7",
aggAndTotal=!isCounty,
title="Deaths per million, 7-day rolling mean",
facetScales="fixed",
printPlot=FALSE
)
if (isTRUE(clusterAggTogether)) {
gridExtra::grid.arrange(p9, p11, nrow=1)
gridExtra::grid.arrange(p10, p12, nrow=1)
} else {
print(p9); print(p10); print(p11); print(p12)
}
# Return the plotting data frame
dfPlot
}
# Check how 5 clusters look, with Vermont arbitrarily reassigned as the same cluster as New Hampshire
clustVec <- cutree(testCluster, k=6)
clustVec["VT"] <- clustVec["NH"]
plotData <- assessClusters(clustVec)
##
## Recency is defined as 2020-07-22 through current
##
## Recency is defined as 2020-07-22 through current
At a glance, the segments appear reasonable:
The full process can then all be run in one place:
# Extract state data
stateData <- getStateData()
# Load and process CV data
cvFull <- readCVData("./RInputFiles/Coronavirus/CV_downloaded_200820.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## state = col_character(),
## dataQualityGrade = col_character(),
## lastUpdateEt = col_character(),
## dateModified = col_datetime(format = ""),
## checkTimeEt = col_character(),
## dateChecked = col_datetime(format = ""),
## hash = col_character(),
## grade = col_logical()
## )
## See spec(...) for full column specifications.
## Observations: 9,449
## Variables: 53
## $ date <date> 2020-08-20, 2020-08-20, 2020-08-20, 20...
## $ state <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO...
## $ positive <dbl> 5332, 112449, 54765, 0, 196280, 644751,...
## $ negative <dbl> 307315, 784330, 593744, 1514, 922163, 9...
## $ pending <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ hospitalizedCurrently <dbl> 51, 1105, 499, NA, 1070, 6212, 238, 47,...
## $ hospitalizedCumulative <dbl> NA, 13330, 3790, NA, 21143, NA, 6784, 1...
## $ inIcuCurrently <dbl> NA, NA, NA, NA, 388, 1707, NA, NA, 26, ...
## $ inIcuCumulative <dbl> NA, 1348, NA, NA, NA, NA, NA, NA, NA, N...
## $ onVentilatorCurrently <dbl> 6, NA, 108, NA, 233, NA, NA, NA, 12, NA...
## $ onVentilatorCumulative <dbl> NA, 734, 488, NA, NA, NA, NA, NA, NA, N...
## $ recovered <dbl> 1513, 44684, 48458, NA, 28471, NA, 5759...
## $ dataQualityGrade <chr> "A", "B", "A", "C", "A+", "B", "A", "B"...
## $ lastUpdateEt <chr> "8/20/2020 0:00", "8/20/2020 11:00", "8...
## $ dateModified <dttm> 2020-08-20 00:00:00, 2020-08-20 11:00:...
## $ checkTimeEt <chr> "8/19/2020 20:00", "8/20/2020 7:00", "8...
## $ death <dbl> 29, 1974, 641, 0, 4684, 11686, 1800, 44...
## $ hospitalized <dbl> NA, 13330, 3790, NA, 21143, NA, 6784, 1...
## $ dateChecked <dttm> 2020-08-20 00:00:00, 2020-08-20 11:00:...
## $ totalTestsViral <dbl> 312647, 891813, 648509, NA, 1116897, 10...
## $ positiveTestsViral <dbl> 4970, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ negativeTestsViral <dbl> 307360, NA, 593744, NA, NA, NA, NA, NA,...
## $ positiveCasesViral <dbl> 5332, 107483, 54765, 0, 194734, 644751,...
## $ deathConfirmed <dbl> 29, 1905, NA, NA, 4429, NA, NA, 3572, N...
## $ deathProbable <dbl> NA, 69, NA, NA, 255, NA, NA, 886, NA, 6...
## $ totalTestEncountersViral <dbl> NA, NA, NA, NA, NA, NA, 895207, NA, NA,...
## $ totalTestsPeopleViral <dbl> NA, NA, NA, 1514, NA, NA, 645170, NA, 2...
## $ totalTestsAntibody <dbl> NA, NA, NA, NA, 255456, NA, 150931, NA,...
## $ positiveTestsAntibody <dbl> NA, NA, NA, NA, NA, NA, 10406, NA, NA, ...
## $ negativeTestsAntibody <dbl> NA, NA, NA, NA, NA, NA, 140525, NA, NA,...
## $ totalTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ positiveTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ negativeTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsPeopleAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ positiveTestsPeopleAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsAntigen <dbl> NA, NA, 10358, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ fips <dbl> 2, 1, 5, 60, 4, 6, 8, 9, 11, 10, 12, 13...
## $ positiveIncrease <dbl> 85, 971, 549, 0, 723, 5920, 270, 118, 5...
## $ negativeIncrease <dbl> 1713, 10462, 6680, 0, 6481, 81363, 4657...
## $ total <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ totalTestResults <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ totalTestResultsIncrease <dbl> 1798, 11433, 7229, 0, 7204, 87283, 7348...
## $ posNeg <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ deathIncrease <dbl> 0, 30, 10, 0, 50, 163, 12, 1, 1, 0, 119...
## $ hospitalizedIncrease <dbl> 0, 250, 47, 0, 123, 0, 3, 72, 0, 0, 450...
## $ hash <chr> "c83a1d575a597788adccbe170950b8d197754b...
## $ commercialScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeRegularScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ positiveScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ score <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ grade <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
##
## File is unique by: date state and has dimensions: 9449 53
cvFiltered <- processCVData(cvFull)
##
##
## Control totals - note that validState other than TRUE will be discarded
##
## # A tibble: 2 x 4
## validState cases deaths n
## <lgl> <dbl> <dbl> <dbl>
## 1 FALSE 29761 385 790
## 2 TRUE 5516295 165742 8659
cvFilteredPerCapita <- helperMakePerCapita(cvFiltered, k=7)
# Run state-level assessments
assessStateData(cvFilteredPerCapita)
# Test clusters that weight deaths heavily vs. cases and that weight shape more highly than total
testCluster <- clusterStates(cvFilteredPerCapita,
minShape=3,
ratioDeathvsCase = 5,
ratioTotalvsShape = 0.5,
minDeath=100,
minCase=10000
)
# Check how 6 clusters look, with Vermont arbitrarily reassigned as the same cluster as New Hampshire
clustVec2 <- cutree(testCluster, k=7)
clustVec2["VT"] <- clustVec2["NH"]
# Create the cluster assessments
plotData2 <- assessClusters(clustVec2)
##
## Recency is defined as 2020-07-22 through current
##
## Recency is defined as 2020-07-22 through current
The process is easy to run and update now that it is in functional form. An exploration is made for 6 segments, which allows for a bucket of LA, DC, RI. These are states that had meaningful early disease spikes (though less than the main high-spike-early cluster) and also meaningful late disease spikes (though less than the main high-spike-late cluster). Findings include:
The clusterStates() function is updated in two ways:
An example is run using k-means, with 2 segments (the most obvious best silhouette width given these parameters):
# Test clusters that weight deaths heavily vs. cases and that weight shape more highly than total
# Using kmeans and testing for 1-10 clusters
testCluster_km2 <- clusterStates(cvFilteredPerCapita,
minShape=3,
ratioDeathvsCase = 5,
ratioTotalvsShape = 0.5,
minDeath=100,
minCase=10000,
hierarchical=FALSE,
nCenters=2,
testCenters=1:10,
iter.max=20,
nstart=10,
returnList=TRUE,
seed=2008261350
)
##
## Cluster means and counts
## 1 2
## . 12.00 39.00
## totalCases 0.75 0.57
## totalDeaths 3.67 1.02
## cases_3 0.06 0.02
## deaths_3 0.14 0.12
## cases_4 0.34 0.09
## deaths_4 2.02 0.95
## cases_5 0.24 0.11
## deaths_5 1.72 0.99
## cases_6 0.10 0.14
## deaths_6 0.61 0.71
## cases_7 0.15 0.33
## deaths_7 0.34 1.06
## cases_8 0.10 0.19
## deaths_8 0.18 0.90
# Check how 2 clusters look
clustVec_km2 <- testCluster_km2$objCluster$cluster
# Create the cluster assessments
plotData_km2 <- assessClusters(clustVec_km2)
##
## Recency is defined as 2020-07-22 through current
##
## Recency is defined as 2020-07-22 through current
Given the criteria that deaths matter much more than cases and that aggregate matters more than shape, the main clustering distinction is the 11 states plus DC that had early, heavy disease. While this produces the best mean silhouette width, it appears to be missing the distinction of states with a later spike. The elbow plot is consistent with this, as there is no obvious break where within-sum-squares meaningfully stops decreasing. Suppose that 5 segments are created, with the intent of splitting high/low deaths and early/late spikes:
# Test clusters that weight deaths heavily vs. cases and that weight shape more highly than total
# Using kmeans and testing for 1-10 clusters
testCluster_km5 <- clusterStates(cvFilteredPerCapita,
minShape=3,
ratioDeathvsCase = 5,
ratioTotalvsShape = 0.5,
minDeath=100,
minCase=10000,
hierarchical=FALSE,
nCenters=5,
testCenters=1:10,
iter.max=20,
nstart=10,
returnList=TRUE,
seed=2008261400
)
##
## Cluster means and counts
## 1 2 3 4 5
## . 10.00 14.00 9.00 14.00 4.00
## totalCases 0.28 0.86 0.71 0.50 0.78
## totalDeaths 0.41 1.42 2.73 0.99 5.32
## cases_3 0.03 0.01 0.05 0.02 0.10
## deaths_3 0.27 0.08 0.13 0.06 0.15
## cases_4 0.07 0.06 0.26 0.12 0.50
## deaths_4 1.27 0.63 1.73 1.00 2.58
## cases_5 0.06 0.07 0.24 0.19 0.23
## deaths_5 0.69 0.66 1.77 1.50 1.59
## cases_6 0.08 0.18 0.11 0.15 0.07
## deaths_6 0.42 0.61 0.70 1.01 0.44
## cases_7 0.23 0.44 0.21 0.28 0.06
## deaths_7 0.70 1.60 0.42 0.81 0.18
## cases_8 0.16 0.22 0.14 0.19 0.04
## deaths_8 0.70 1.38 0.25 0.61 0.06
# Check how 5 clusters look
clustVec_km5 <- testCluster_km5$objCluster$cluster
# Create the cluster assessments
plotData_km5 <- assessClusters(clustVec_km5)
##
## Recency is defined as 2020-07-22 through current
##
## Recency is defined as 2020-07-22 through current
The clusters appear very similar to those created using hierarchical clustering. A comparison of the segments assigned is made:
tibble::tibble(state=names(clustVec),
hier5=clustVec,
hier6=clustVec2,
km2=clustVec_km2,
km5=clustVec_km5
) %>%
count(hier6, hier5, km2, km5)
## # A tibble: 10 x 5
## hier6 hier5 km2 km5 n
## <int> <int> <int> <int> <int>
## 1 1 1 2 1 6
## 2 1 1 2 2 6
## 3 1 1 2 4 1
## 4 2 2 2 2 8
## 5 3 3 2 1 4
## 6 3 3 2 4 13
## 7 4 4 1 5 4
## 8 5 5 1 3 3
## 9 6 5 1 3 5
## 10 6 5 2 3 1
States are sufficiently differentiated, and the method sufficiently focused on deaths and aggregates, such that the clustering techniques produce similar results. There are many states that are near the edges of the clusters, and the choice of metrics and even random seeds will drive their assignments. Provided there are enough segments, there appears to typically be 1) at least one segment of early and heavy disease, 2) at least one segment of late and heavy disease, and 3) at least one segment of much lower than average disease. There is then some differentiation as to how the “early and heavy” and “lower than average” segments are identified and/or further subsetted.
The assessClusters() function is updated to put smaller versions of related plots all on a single page. Example usage is shown below:
# Create the cluster assessments
plotData_km5 <- assessClusters(clustVec_km5,
dfState=stateData,
dfBurden=cvFilteredPerCapita,
plotsTogether=TRUE
)
##
## Recency is defined as 2020-07-22 through current
##
## Recency is defined as 2020-07-22 through current
Hospitalization data is also included in the raw coronavirus file from The COVID Project:
# All fields contained in the raw CV file
names(cvData)
## [1] "date" "state"
## [3] "positive" "negative"
## [5] "pending" "hospitalizedCurrently"
## [7] "hospitalizedCumulative" "inIcuCurrently"
## [9] "inIcuCumulative" "onVentilatorCurrently"
## [11] "onVentilatorCumulative" "recovered"
## [13] "dataQualityGrade" "lastUpdateEt"
## [15] "dateModified" "checkTimeEt"
## [17] "death" "hospitalized"
## [19] "dateChecked" "totalTestsViral"
## [21] "positiveTestsViral" "negativeTestsViral"
## [23] "positiveCasesViral" "deathConfirmed"
## [25] "deathProbable" "totalTestEncountersViral"
## [27] "totalTestsPeopleViral" "totalTestsAntibody"
## [29] "positiveTestsAntibody" "negativeTestsAntibody"
## [31] "totalTestsPeopleAntibody" "positiveTestsPeopleAntibody"
## [33] "negativeTestsPeopleAntibody" "totalTestsPeopleAntigen"
## [35] "positiveTestsPeopleAntigen" "totalTestsAntigen"
## [37] "positiveTestsAntigen" "fips"
## [39] "positiveIncrease" "negativeIncrease"
## [41] "total" "totalTestResults"
## [43] "totalTestResultsIncrease" "posNeg"
## [45] "deathIncrease" "hospitalizedIncrease"
## [47] "hash" "commercialScore"
## [49] "negativeRegularScore" "negativeScore"
## [51] "positiveScore" "score"
## [53] "grade"
# Fields matching to 'hosp' or 'icu' or 'ventilator'
hospVars <- names(cvData) %>%
grep(x=., pattern="[Hh]osp|[Ii]cu|[Vv]entilator", value=TRUE) %>%
sort()
hospVars
## [1] "hospitalized" "hospitalizedCumulative" "hospitalizedCurrently"
## [4] "hospitalizedIncrease" "inIcuCumulative" "inIcuCurrently"
## [7] "onVentilatorCumulative" "onVentilatorCurrently"
Data are investigated for amount of ‘missingness’ by time period:
set.seed(2008281323)
cvData %>%
select_at(vars(all_of(c("state", "date", hospVars)))) %>%
sample_n(20) %>%
purrr::set_names(c("state", "date",
"hosp", "hospCum", "hospCur", "hospInc",
"icuCum", "icuCur", "ventCum", "ventCur"
)
) %>%
arrange(date)
## # A tibble: 20 x 10
## state date hosp hospCum hospCur hospInc icuCum icuCur ventCum ventCur
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 WA 2020-03-07 NA NA NA 0 NA NA NA NA
## 2 UT 2020-03-13 NA NA NA 0 NA NA NA NA
## 3 CO 2020-03-24 72 72 116 14 NA NA NA NA
## 4 CT 2020-04-10 NA NA 1562 0 NA NA NA NA
## 5 ME 2020-04-13 124 124 22 4 NA NA NA NA
## 6 AK 2020-04-15 34 34 NA 2 NA NA NA NA
## 7 DE 2020-05-05 NA NA 284 0 NA NA NA NA
## 8 NJ 2020-05-16 NA NA 3564 0 NA 1061 NA 846
## 9 MS 2020-05-18 1805 1805 559 32 NA 141 NA 75
## 10 CO 2020-05-28 4196 4196 464 36 NA NA NA NA
## 11 NE 2020-06-23 1234 1234 135 22 NA NA NA NA
## 12 WI 2020-06-29 3407 3407 237 14 747 90 NA NA
## 13 VT 2020-06-29 NA NA 13 0 NA NA NA NA
## 14 MP 2020-07-03 NA NA NA 0 NA NA NA NA
## 15 WA 2020-07-09 4630 4630 329 48 NA NA NA 59
## 16 SD 2020-07-31 824 824 31 9 NA NA NA NA
## 17 ID 2020-08-05 906 906 195 20 260 39 NA NA
## 18 MO 2020-08-07 NA NA 930 0 NA NA NA 113
## 19 MI 2020-08-12 NA NA 640 0 NA 185 NA 84
## 20 IA 2020-08-16 NA NA 271 0 NA 80 NA 34
Missing data appears to be common, and not always reflective of zero. There is at least some directional evidence that the hospitalized currently variables has been coming on line and that the hospitalized increase variable uses 0 for both NA and ‘no increase’. This would be problematic for using any of the variables (potentially other than hospCur) for cross-state comparisons. An analysis is run to see the frequency of NA by variable by date:
# Not NA data
notNADate <- cvData %>%
filter(state %in% c(state.abb, "DC")) %>%
select_at(vars(all_of(c("state", "date", hospVars)))) %>%
mutate(nState=1) %>%
group_by(date) %>%
summarize_if(is.numeric, .funs=function(x) { sum(!is.na(x))}) %>%
ungroup()
# Evolution of Not NA states by time
notNADate %>%
pivot_longer(-date) %>%
ggplot(aes(x=date,
y=value,
group=fct_rev(fct_reorder(name, value, .fun=max)),
color=fct_rev(fct_reorder(name, value, .fun=max))
)
) +
geom_line(lwd=1) +
geom_hline(yintercept=51, lty=2) +
labs(x="", y="Number of states with non-NA data", title="Evolution of data availability by metric") +
scale_color_discrete("") +
scale_x_date(date_breaks="1 months", date_labels="%m")
# Confirmation that hospitalized increase and nState are identical
sum(notNADate$hospitalizedIncrease != notNADate$nState)
## [1] 0
The plots confirm the meaningful gaps in the hospitalization, ICU, and ventilator data. Further, hospitalized increase exists and is non-missing for every case where there is a record (nState), suggesting that this metric has already had a filter such as ifelse(is.na(x), 0, x) applied. The only data that appears to grow from NA to potentially stable is ‘hospitalized currently’, which has become valid in all 51 states as of August.
General data availability by metric is:
# Not NA data
notNADateState <- cvData %>%
filter(state %in% c(state.abb, "DC")) %>%
select_at(vars(all_of(c("state", "date", hospVars)))) %>%
mutate(nState=1, month=lubridate::month(date)) %>%
group_by(month, state) %>%
summarize_if(is.numeric, .funs=function(x) { min(!is.na(x))}) %>%
ungroup()
# Evolution of Not NA states by month
notNADateState %>%
pivot_longer(-c(state, month)) %>%
filter(!(name %in% c("nState", "hospitalizedIncrease"))) %>%
ggplot(aes(y=fct_reorder(state, value, .fun=sum), x=month)) +
geom_tile(aes(fill=value)) +
labs(x="", y="", title="Evolution of data availability by metric") +
scale_fill_continuous("", low="white", high="green") +
facet_wrap(~name, nrow=1)
# States missing hospitalizedCurrently as of May 1
notNADateState %>%
filter(hospitalizedCurrently != 1, month >= 5)
## # A tibble: 17 x 11
## month state hospitalized hospitalizedCum~ hospitalizedCur~ hospitalizedInc~
## <dbl> <chr> <int> <int> <int> <int>
## 1 5 DC 0 0 0 1
## 2 5 FL 1 1 0 1
## 3 5 HI 1 1 0 1
## 4 5 KS 1 1 0 1
## 5 5 NE 0 0 0 1
## 6 5 NV 0 0 0 1
## 7 5 OH 1 1 0 1
## 8 5 OK 1 1 0 1
## 9 5 SC 1 1 0 1
## 10 5 UT 1 1 0 1
## 11 6 FL 1 1 0 1
## 12 6 HI 1 1 0 1
## 13 6 KS 1 1 0 1
## 14 6 NE 0 0 0 1
## 15 7 FL 1 1 0 1
## 16 7 HI 1 1 0 1
## 17 7 KS 1 1 0 1
## # ... with 5 more variables: inIcuCumulative <int>, inIcuCurrently <int>,
## # onVentilatorCumulative <int>, onVentilatorCurrently <int>, nState <int>
The hospitalized currently metric is fully complete as of August, and mostly complete as of June. Only data from Florida, Hawaii, Kansas, and Nebraska is missing, and all but Nebraska report data in ‘hospitalized’ for those time periods. How does the hospitalized data compare with the hospitalizedCurrently data for FL, HI, and KS?
# Hospitalized comparisons
cvData %>%
arrange(state, date) %>%
group_by(state) %>%
filter(state %in% c("FL", "HI", "KS"),
is.na(lag(hospitalizedCurrently, 10)),
!is.na(lead(hospitalizedCurrently, 5))
) %>%
select(date, state, contains("hosp")) %>%
as.data.frame()
## date state hospitalizedCurrently hospitalizedCumulative hospitalized
## 1 2020-07-05 FL NA 16201 16201
## 2 2020-07-06 FL NA 16352 16352
## 3 2020-07-07 FL NA 16733 16733
## 4 2020-07-08 FL NA 17068 17068
## 5 2020-07-09 FL NA 17479 17479
## 6 2020-07-10 FL 6974 17916 17916
## 7 2020-07-11 FL 7186 18341 18341
## 8 2020-07-12 FL 7542 18590 18590
## 9 2020-07-13 FL 8051 18817 18817
## 10 2020-07-14 FL 8354 19201 19201
## 11 2020-07-15 FL 8217 19659 19659
## 12 2020-07-16 FL 9112 20154 20154
## 13 2020-07-17 FL 8961 20526 20526
## 14 2020-07-18 FL 9144 20969 20969
## 15 2020-07-19 FL 9363 21309 21309
## 16 2020-07-09 HI NA 122 122
## 17 2020-07-10 HI NA 123 123
## 18 2020-07-11 HI NA 125 125
## 19 2020-07-12 HI NA 125 125
## 20 2020-07-13 HI NA 125 125
## 21 2020-07-14 HI 23 128 128
## 22 2020-07-15 HI 31 133 133
## 23 2020-07-16 HI 40 137 137
## 24 2020-07-17 HI 39 138 138
## 25 2020-07-18 HI 39 139 139
## 26 2020-07-19 HI 39 140 140
## 27 2020-07-20 HI 33 150 150
## 28 2020-07-21 HI 46 150 150
## 29 2020-07-22 HI 47 151 151
## 30 2020-07-23 HI 39 154 154
## 31 2020-07-20 KS NA 1497 1497
## 32 2020-07-21 KS NA 1497 1497
## 33 2020-07-22 KS NA 1545 1545
## 34 2020-07-23 KS NA 1545 1545
## 35 2020-07-24 KS NA 1596 1596
## 36 2020-07-25 KS 315 1596 1596
## 37 2020-07-26 KS 315 1596 1596
## 38 2020-07-27 KS 212 1644 1644
## 39 2020-07-28 KS 212 1644 1644
## 40 2020-07-29 KS 393 1700 1700
## 41 2020-07-30 KS 393 1700 1700
## 42 2020-07-31 KS 366 1751 1751
## 43 2020-08-01 KS 366 1751 1751
## 44 2020-08-02 KS 366 1751 1751
## 45 2020-08-03 KS 232 1782 1782
## hospitalizedIncrease
## 1 161
## 2 151
## 3 381
## 4 335
## 5 411
## 6 437
## 7 425
## 8 249
## 9 227
## 10 384
## 11 458
## 12 495
## 13 372
## 14 443
## 15 340
## 16 3
## 17 1
## 18 2
## 19 0
## 20 0
## 21 3
## 22 5
## 23 4
## 24 1
## 25 1
## 26 1
## 27 10
## 28 0
## 29 1
## 30 3
## 31 44
## 32 0
## 33 48
## 34 0
## 35 51
## 36 0
## 37 0
## 38 48
## 39 0
## 40 56
## 41 0
## 42 51
## 43 0
## 44 0
## 45 31
Prior to reporting hospitalizedCurrently, it appears that the hopitalized field and hospitalizedCumulative fields were identical for these states. And, hospitalizedIncrease appears to be the change in hospitalizedCumulative, which would be the number of people newly admitted to the hospital on that day (no reduction for any discharges/deaths on that day).
The lack of data will meaningfully complicate any cross-state comparisons, since some states did not report the same metrics (or at all) during times when their state had meaningful disease burden as shown by cases and deaths.
Since the ‘hospitalizedCurrently’ field is the most complete, a function is written to plot the per capita evolution of this metric by segment:
# Function to create plots of the number hospitalized by state and cluster
plotHospitalized <- function(df,
clusterVector,
dfState=stateData,
subT=""
) {
# FUNCTION ARGUMENTS:
# df: a data frame or tibble containing 'state', 'date', 'hospitalizedCurrently'
# clusterVector: a named vector of form 'state'='cluster'
# dfState: a state-level population file containing 'state' and 'pop'
# subT: a subtitle for the plot
# Create the key plotting data
plotData <- df %>%
inner_join(dfState, by="state") %>%
mutate(cluster=factor(clusterVector[state])) %>%
filter(!is.na(hospitalizedCurrently)) %>%
select(date, state, cluster, hospitalizedCurrently, pop) %>%
rbind(mutate(., state="Total")) %>%
group_by(state, cluster, date) %>%
summarize(n=n(),
hospitalizedCurrently=sum(hospitalizedCurrently),
pop=sum(pop)
) %>%
mutate(hpm=1000000*hospitalizedCurrently/pop) %>%
helperRollingAgg(origVar="hpm", newName="hpm7") %>%
ungroup()
# Create the plot
p1 <- plotData %>%
filter(!is.na(hpm7)) %>%
ggplot(aes(x=date, y=hpm7)) +
geom_line(data=~filter(., state != "Total"), aes(group=state), alpha=0.25) +
geom_line(data=~filter(., state == "Total"), aes(group=state, color=cluster), lwd=1.5) +
facet_wrap(~cluster, scales="fixed") +
ylim(c(0, NA)) +
labs(x="",
y="Currently Hospitalized 7-day rolling mean (per million)",
title="Hospitalized per million by cluster",
subtitle=subT
)
print(p1)
# Return the plot data
plotData
}
# Create the hospitalized plot
dfHospital <- plotHospitalized(cvData, clusterVector=clustVec_km5, subT="Data through August 20")
The data show very similar patterns and shapes as when the segments were plotted using cases and deaths.
The hospital data can then be integrated to the existing data file with cases and deaths. Filling with NA for the hospitalized data is OK, so a left join is performed:
# This will drop the cluster aggregate that was created inside dfHospital
metrics_km5 <- plotData_km5 %>%
left_join(select(dfHospital, -n, -pop), by=c("state", "cluster", "date"))
# Explore cluster-level totals for cases, deaths, hospitalizedCurrently
metrics_km5_plotData <- metrics_km5 %>%
select(state, cluster, date, pop, cases, deaths, hosp=hospitalizedCurrently) %>%
pivot_longer(-c(state, cluster, date, pop)) %>%
filter(!is.na(value)) %>%
rbind(mutate(., state="cluster")) %>%
group_by(state, cluster, date, name) %>%
summarize(value=sum(value), pop=sum(pop)) %>%
mutate(vpm=1000000*value/pop) %>%
arrange(state, cluster, name, date) %>%
group_by(state, cluster, name) %>%
helperRollingAgg(origVar="vpm", newName="vpm7")
# Create facetted plots for totals by metric by segment
metrics_km5_plotData %>%
filter(!is.na(vpm7)) %>%
ggplot(aes(x=date, y=vpm7)) +
geom_line(data=~filter(., state=="cluster"), aes(group=cluster, color=cluster), lwd=1.5) +
geom_line(data=~filter(., state!="cluster"), aes(group=state), alpha=0.25) +
facet_grid(name ~ cluster, scales="free_y") +
labs(x="",
y="Rolling 7-day mean per million",
title="Key metrics by cluster (7-day rolling mean per million)",
subtitle="Cases: new cases, Deaths: new deaths, Hospitalized: total in hospital (not new)"
) +
scale_x_date(date_breaks="1 months", date_labels="%b") +
theme(axis.text.x=element_text(angle=90))
# Create all-segment plot by metric
metrics_km5_plotData %>%
filter(!is.na(vpm7)) %>%
ggplot(aes(x=date, y=vpm7)) +
geom_line(data=~filter(., state=="cluster"), aes(group=cluster, color=cluster), lwd=1.5) +
facet_wrap(~ name, scales="free_y", nrow=1) +
labs(x="",
y="Rolling 7-day mean per million",
title="Key metrics by cluster (7-day rolling mean per million)",
subtitle="Cases: new cases, Deaths: new deaths, Hospitalized: total in hospital (not new)"
) +
scale_x_date(date_breaks="1 months", date_labels="%b") +
theme(axis.text.x=element_text(angle=90))
# Create all-metric plot by segment (define 100% as peak for segment-metric)
metrics_km5_plotData %>%
filter(!is.na(vpm7)) %>%
group_by(state, cluster, name) %>%
mutate(spm7=vpm7/max(vpm7)) %>%
ggplot(aes(x=date, y=spm7)) +
geom_line(data=~filter(., state=="cluster"), aes(group=name, color=cluster, linetype=name), lwd=1) +
facet_wrap(~ cluster, scales="free_y") +
labs(x="",
y="% of Maximum",
title="Key metrics by cluster (% of maximum)",
subtitle="Cases: new cases, Deaths: new deaths, Hospitalized: total in hospital (not new)"
) +
scale_x_date(date_breaks="1 months", date_labels="%b") +
theme(axis.text.x=element_text(angle=90))
For the segments hit early (2 and 4), there was at most a small time difference between the peak for cases, hospitalizations, and deaths. This is potentially driven by very limited testing, with many diagnoses being made when patients already had advanced disease. Segment 4 has had a reborund in cases but without any rebound in hospitalizations or deaths, suggesting that the recent spike in cases may be due to increased testing.
For the primary segment being hit late (1), there appears to be a 2-4 week gap between the peak in cases and hospitalizations and the peak in deaths. This is potentially driven by a larger number of cases being found early due to increased testing.
Segments 3 and 5 are near their peaks for cases and hospitalizations, while segment 3 (but not segment 5) is also near its peak for deaths. These segments have currently had a low burden on a per million basis, and the evolution of disease bruden in the following weeks or months is uncertain. Segment 3 across plots may be showing more indicia of a late spike like segment 1, while segment 5 across plots may be showing more indicia of a modest rebound in cases like segment 4.
The COVID Tracking Project offers an API for downloading the most recent data as JSON or CSV. A function is written to download the data to a specified file name, read in the data and check key control totals:
# Function to download data for COVID Tracking Project
downloadCOVIDbyState <- function(fileName,
api="https://api.covidtracking.com/v1/states/daily.csv",
ovrWrite=FALSE
) {
# COVID Tracking Project API allows data downloads for personal, non-commercial use
# https://covidtracking.com/data/api
# FUNCTION ARGUMENTS:
# fileName: the filename that the data will be saved to
# api: The API link for data downloads
# ovrWrite: whether to allow overwriting of the existing fileName
# Check whether fileName already exists
if (file.exists(fileName)) {
cat("\nFile already exists at:", fileName, "\n")
if (ovrWrite) cat("Will over-write with current data from", api, "\n")
else stop("Exiting due to ovrWrite=FALSE and a duplicate fileName\n")
}
# Download the file
download.file(api, destfile=fileName)
# Show statistics on downloaded file
file.info(fileName)
}
The function is run on 2020-August-30, with results cached to avoid overwriting:
# Download the file with the August 30, 2020 data
downloadCOVIDbyState(fileName="./RInputFiles/Coronavirus/CV_downloaded_200830.csv")
## size isdir mode
## ./RInputFiles/Coronavirus/CV_downloaded_200830.csv 2478232 FALSE 666
## mtime
## ./RInputFiles/Coronavirus/CV_downloaded_200830.csv 2020-08-30 09:51:09
## ctime
## ./RInputFiles/Coronavirus/CV_downloaded_200830.csv 2020-08-30 09:51:05
## atime exe
## ./RInputFiles/Coronavirus/CV_downloaded_200830.csv 2020-08-30 09:51:09 no
A function can then be written to read the file, change the date field to type date, check uniqueness, and compare available fields, elements, and control totals to an existing file:
# Function to read, convert, and sanity check a downloaded file
readCOViDbyState <- function(fileName,
checkFile=cvData,
controlFields=c("positiveIncrease", "deathIncrease", "hospitalizedCurrently"),
controlBy=c("state")
) {
# FUNCTION ARGUMENTS:
# fileName: the file name for reading the data
# checkFile: a file that can be used for comparison purposes
# controlFields: fields that will be explicitly checked against checkFile
# controlBy: level of aggregation at which fields will be explicitly checked against checkFile
# Read in the file and convert the numeric date field to date using ymd format
df <- readr::read_csv(fileName) %>%
mutate(date=lubridate::ymd(date))
# Check that the file is unique by date-state
if ((df %>% select(date, state) %>% anyDuplicated()) != 0) {
stop("\nDuplicates by date and state, investigate and fix\n")
} else {
cat("\nFile is unique by state and date\n")
}
# Check for similarity of key elements
helperSimilarity <- function(newData, refData, label) {
cat("\n\nCheckin for similarity of:", label)
cat("\nIn reference but not in current:", setdiff(refData, newData))
cat("\nIn current but not in reference:", setdiff(newData, refData))
}
# Check for similarity of fields, dates, and states
cat("\n*** COMPARISONS TO REFERENCE FILE:", deparse(substitute(checkFile)))
helperSimilarity(newData=names(df), refData=names(checkFile), label="column names")
helperSimilarity(newData=df %>% pull(state) %>% unique(),
refData=checkFile %>% pull(state) %>% unique() ,
label="states"
)
helperSimilarity(newData=df %>% pull(date) %>% unique() %>% format("%Y-%m-%d"),
refData=checkFile %>% pull(date) %>% unique() %>% format("%Y-%m-%d"),
label="dates"
)
# Check for overall control totals in new file
cat("\n\nOverall control totals in file:\n")
df %>%
summarize_at(vars(all_of(controlFields)), .funs=sum, na.rm=TRUE) %>%
print()
# Check for similarity of control totals by date in files
dfByDate <- df %>%
group_by(date) %>%
summarize_at(vars(all_of(controlFields)), .funs=sum, na.rm=TRUE) %>%
ungroup() %>%
pivot_longer(-date, values_to="newValue")
checkByDate <- checkFile %>%
group_by(date) %>%
summarize_at(vars(all_of(controlFields)), .funs=sum, na.rm=TRUE) %>%
ungroup() %>%
pivot_longer(-date, values_to="oldValue")
cat("\n\n*** Difference of at least 5 and difference is at least 1%:\n\n")
dfByDate %>%
inner_join(checkByDate) %>%
filter(abs(newValue-oldValue)>=5,
pmax(newValue, oldValue)>=1.01*pmin(newValue, oldValue)
) %>%
as.data.frame() %>%
print()
p1 <- dfByDate %>%
inner_join(checkByDate) %>%
pivot_longer(-c(date, name), names_to="newOld") %>%
ggplot(aes(x=date, y=value, group=newOld, color=newOld)) +
geom_line() +
facet_wrap(~name, nrow=1, scales="free_y") +
labs(title="Control totals by date for new and reference file", x="", y="Summed Value")
print(p1)
# Check for similarity of control totals by controlBy in files
dfByControl <- df %>%
semi_join(select(checkFile, date), by="date") %>%
group_by_at(vars(all_of(controlBy))) %>%
summarize_at(vars(all_of(controlFields)), .funs=sum, na.rm=TRUE) %>%
ungroup() %>%
pivot_longer(-all_of(controlBy), values_to="newValue")
checkByControl <- checkFile %>%
group_by_at(vars(all_of(controlBy))) %>%
summarize_at(vars(all_of(controlFields)), .funs=sum, na.rm=TRUE) %>%
ungroup() %>%
pivot_longer(-all_of(controlBy), values_to="oldValue")
cat("\n\n*** Difference of at least 5 and difference is at least 1%:\n\n")
dfByControl %>%
inner_join(checkByControl) %>%
filter(abs(newValue-oldValue)>=5,
pmax(newValue, oldValue)>=1.01*pmin(newValue, oldValue)
) %>%
as.data.frame() %>%
print()
# Return the data file
df
}
The file can then be read and sanity checked:
dfRaw_20200830 <- readCOViDbyState("./RInputFiles/Coronavirus/CV_downloaded_200830.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## state = col_character(),
## dataQualityGrade = col_character(),
## lastUpdateEt = col_character(),
## dateModified = col_datetime(format = ""),
## checkTimeEt = col_character(),
## dateChecked = col_datetime(format = ""),
## fips = col_character(),
## totalTestResultsSource = col_character(),
## hash = col_character(),
## grade = col_logical()
## )
## See spec(...) for full column specifications.
##
## File is unique by state and date
##
## *** COMPARISONS TO REFERENCE FILE: cvData
##
## Checkin for similarity of: column names
## In reference but not in current:
## In current but not in reference: totalTestResultsSource
##
## Checkin for similarity of: states
## In reference but not in current:
## In current but not in reference:
##
## Checkin for similarity of: dates
## In reference but not in current:
## In current but not in reference: 2020-08-29 2020-08-28 2020-08-27 2020-08-26 2020-08-25 2020-08-24 2020-08-23 2020-08-22 2020-08-21
##
## Overall control totals in file:
## # A tibble: 1 x 3
## positiveIncrease deathIncrease hospitalizedCurrently
## <dbl> <dbl> <dbl>
## 1 5928267 174768 6833225
##
##
## *** Difference of at least 5 and difference is at least 1%:
## Joining, by = c("date", "name")
## date name newValue oldValue
## 1 2020-02-15 positiveIncrease 0 7
## 2 2020-02-16 positiveIncrease 0 7
## 3 2020-02-17 positiveIncrease 0 15
## 4 2020-02-18 positiveIncrease 0 9
## 5 2020-02-19 positiveIncrease 0 10
## 6 2020-02-20 positiveIncrease 0 13
## 7 2020-02-21 positiveIncrease 0 11
## 8 2020-02-22 positiveIncrease 0 13
## 9 2020-02-23 positiveIncrease 0 16
## 10 2020-02-24 positiveIncrease 0 26
## 11 2020-02-25 positiveIncrease 0 31
## 12 2020-02-26 positiveIncrease 0 29
## 13 2020-02-27 positiveIncrease 0 27
## 14 2020-02-28 positiveIncrease 0 40
## 15 2020-02-29 positiveIncrease 18 24
## 16 2020-03-01 positiveIncrease 16 78
## 17 2020-03-02 positiveIncrease 40 82
## 18 2020-03-03 positiveIncrease 41 100
## 19 2020-03-04 positiveIncrease 56 110
## 20 2020-03-05 positiveIncrease 97 151
## 21 2020-03-06 positiveIncrease 94 137
## 22 2020-03-07 positiveIncrease 156 217
## 23 2020-03-08 positiveIncrease 175 267
## 24 2020-03-09 positiveIncrease 256 367
## 25 2020-03-10 positiveIncrease 335 441
## 26 2020-03-11 positiveIncrease 449 527
## 27 2020-03-12 positiveIncrease 597 674
## 28 2020-03-13 positiveIncrease 945 1025
## 29 2020-03-14 positiveIncrease 805 915
## 30 2020-03-15 positiveIncrease 1105 1251
## 31 2020-03-16 positiveIncrease 1491 1560
## 32 2020-03-17 positiveIncrease 3552 3613
## 33 2020-03-18 positiveIncrease 3082 3171
## 34 2020-03-20 positiveIncrease 6153 6255
## 35 2020-03-21 positiveIncrease 6793 6885
## 36 2020-03-22 positiveIncrease 9161 9259
## 37 2020-06-08 positiveIncrease 17209 17012
## 38 2020-06-19 positiveIncrease 31472 31046
## 39 2020-06-21 positiveIncrease 27928 27284
## 40 2020-06-23 positiveIncrease 33447 33021
## 41 2020-06-29 positiveIncrease 39811 39175
## 42 2020-07-26 positiveIncrease 61009 61713
## 43 2020-08-02 positiveIncrease 46756 48266
## 44 2020-08-09 positiveIncrease 50624 51365
## 45 2020-08-16 positiveIncrease 42487 43083
## 46 2020-08-20 positiveIncrease 43758 43245
## 47 2020-08-20 deathIncrease 1134 1117
## Joining, by = c("date", "name")
##
##
## *** Difference of at least 5 and difference is at least 1%:
## Joining, by = c("state", "name")
## state name newValue oldValue
## 1 WA positiveIncrease 70973 68687
glimpse(dfRaw_20200830)
## Observations: 9,953
## Variables: 54
## $ date <date> 2020-08-29, 2020-08-29, 2020-08-29, 20...
## $ state <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO...
## $ positive <dbl> 6035, 123889, 60378, 0, 201287, 693839,...
## $ negative <dbl> 339660, 851929, 646592, 1514, 991089, 1...
## $ pending <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ hospitalizedCurrently <dbl> 43, 986, 407, NA, 812, 5063, 225, 60, 7...
## $ hospitalizedCumulative <dbl> NA, 14267, 4142, NA, 21433, NA, 6945, 1...
## $ inIcuCurrently <dbl> NA, NA, NA, NA, 262, 1411, NA, NA, 25, ...
## $ inIcuCumulative <dbl> NA, 1459, NA, NA, NA, NA, NA, NA, NA, N...
## $ onVentilatorCurrently <dbl> 7, NA, 95, NA, 164, NA, NA, NA, 13, NA,...
## $ onVentilatorCumulative <dbl> NA, 801, 538, NA, NA, NA, NA, NA, NA, N...
## $ recovered <dbl> 2201, 48028, 54133, NA, 30331, NA, 5921...
## $ dataQualityGrade <chr> "A", "B", "A", "C", "A+", "B", "A+", "B...
## $ lastUpdateEt <chr> "8/29/2020 03:59", "8/29/2020 11:00", "...
## $ dateModified <dttm> 2020-08-29 03:59:00, 2020-08-29 11:00:...
## $ checkTimeEt <chr> "08/28 23:59", "08/29 07:00", "08/28 20...
## $ death <dbl> 37, 2152, 772, 0, 5007, 12834, 1843, 44...
## $ hospitalized <dbl> NA, 14267, 4142, NA, 21433, NA, 6945, 1...
## $ dateChecked <dttm> 2020-08-29 03:59:00, 2020-08-29 11:00:...
## $ totalTestsViral <dbl> 345695, 967213, 706970, NA, 1190668, 11...
## $ positiveTestsViral <dbl> 5558, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ negativeTestsViral <dbl> 339808, NA, 646592, NA, NA, NA, NA, NA,...
## $ positiveCasesViral <dbl> 6035, 115284, 60378, 0, 199579, 693839,...
## $ deathConfirmed <dbl> 37, 2059, NA, NA, 4738, NA, NA, 3579, N...
## $ deathProbable <dbl> NA, 93, NA, NA, 269, NA, NA, 886, NA, 7...
## $ totalTestEncountersViral <dbl> NA, NA, NA, NA, NA, NA, 985622, NA, 285...
## $ totalTestsPeopleViral <dbl> NA, NA, NA, 1514, NA, NA, 695569, NA, N...
## $ totalTestsAntibody <dbl> NA, NA, NA, NA, 265542, NA, 156499, NA,...
## $ positiveTestsAntibody <dbl> NA, NA, NA, NA, NA, NA, 10454, NA, NA, ...
## $ negativeTestsAntibody <dbl> NA, NA, NA, NA, NA, NA, 141188, NA, NA,...
## $ totalTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ positiveTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ negativeTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsPeopleAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ positiveTestsPeopleAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsAntigen <dbl> NA, NA, 3610, NA, NA, NA, NA, NA, NA, N...
## $ positiveTestsAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ fips <chr> "02", "01", "05", "60", "04", "06", "08...
## $ positiveIncrease <dbl> 112, 1704, 795, 0, 629, 4981, 430, 0, 7...
## $ negativeIncrease <dbl> 6045, 9251, 8120, 0, 7670, 94012, 7643,...
## $ total <dbl> 345695, 975818, 706970, 1514, 1192376, ...
## $ totalTestResultsSource <chr> "posNeg", "posNeg", "posNeg", "posNeg",...
## $ totalTestResults <dbl> 345695, 975818, 706970, 1514, 1192376, ...
## $ totalTestResultsIncrease <dbl> 6157, 10955, 8915, 0, 8299, 98993, 1263...
## $ posNeg <dbl> 345695, 975818, 706970, 1514, 1192376, ...
## $ deathIncrease <dbl> 0, 45, 16, 0, 29, 144, 8, 0, 0, 0, 150,...
## $ hospitalizedIncrease <dbl> 0, 0, 0, 0, 5, 0, 17, 0, 0, 0, 287, 198...
## $ hash <chr> "d14280fc719ed7f9365e0e046b0ac9ffd9a0b0...
## $ commercialScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeRegularScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ positiveScore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ score <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ grade <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
There appear to be multiple modest restatements of the data, but the overall trends by date and state are substantially the same as in the previous file.
Next steps are to use existing segments and to track their evolution against the newer data.
The data can be run against the filtering functions, with only key variables maintained:
# Variables to be kept
varsFilter <- c("date", "state", "positiveIncrease", "deathIncrease", "hospitalizedCurrently")
# Keep the cases, deaths, and hospitalized currently data for the 50 states and DC
dfFiltered_20200830 <- processCVData(dfRaw_20200830,
varsKeep=varsFilter,
varsRename=c(positiveIncrease="cases",
deathIncrease="deaths",
hospitalizedCurrently="hosp"
)
)
##
##
## Control totals - note that validState other than TRUE will be discarded
##
## # A tibble: 2 x 5
## validState cases deaths hosp n
## <lgl> <dbl> <dbl> <dbl> <dbl>
## 1 FALSE 35062 454 NA 835
## 2 TRUE 5893205 174314 NA 9118
# Show a sample of the filtered file
glimpse(dfFiltered_20200830)
## Observations: 9,118
## Variables: 5
## $ date <date> 2020-08-29, 2020-08-29, 2020-08-29, 2020-08-29, 2020-08-29,...
## $ state <chr> "AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", ...
## $ cases <dbl> 112, 1704, 795, 629, 4981, 430, 0, 74, 266, 3197, 2386, 264,...
## $ deaths <dbl> 0, 45, 16, 29, 144, 8, 0, 0, 0, 150, 105, 4, 13, 10, 11, 8, ...
## $ hosp <dbl> 43, 986, 407, 812, 5063, 225, 60, 76, 61, 3799, 2582, 279, 3...
The function for making per capita variables is updated to allow for other fields to be included:
# Function to add per capita and rolling to the base data frame
# Updated function to take an arbitrary number of variables and convert them
helperMakePerCapita <- function(df,
mapVars=c("cases"="cpm", "deaths"="dpm"),
k=7
) {
# FUNCTION ARGUMENTS:
# df: the initial data frame for conversion
# mapVars: named vector of variables to be converted 'original name'='converted name'
# k: the rolling time period to use
# Create the variables for per capita
for (origVar in names(mapVars)) {
df <- df %>%
helperPerCapita(origVar=origVar, newName=mapVars[origVar])
}
# Arrange the data by date in preparation for rolling aggregations
df <- df %>%
group_by(state) %>%
arrange(date)
# Create the rolling variables
for (newVar in mapVars) {
df <- df %>%
helperRollingAgg(origVar=newVar, newName=paste0(newVar, k), k=k)
}
# Return the updated data frame, ungrouped
df %>%
ungroup()
}
# Confirm that the function, with defaults, is identical to previous
identical(cvFilteredPerCapita, helperMakePerCapita(cvFiltered))
## [1] TRUE
The updated function can then be applied, with the hospital data also included:
# Create per capita and rolling 7 for cases, deaths, hosp
dfPerCapita_20200830 <- helperMakePerCapita(dfFiltered_20200830,
mapVars=c("cases"="cpm", "deaths"="dpm", "hosp"="hpm")
)
glimpse(dfPerCapita_20200830)
## Observations: 9,118
## Variables: 11
## $ date <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01-26,...
## $ state <chr> "WA", "WA", "WA", "WA", "WA", "WA", "WA", "WA", "WA", "WA", ...
## $ cases <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ deaths <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hosp <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ cpm <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ cpm7 <dbl> NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm7 <dbl> NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm7 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
The data can then be applied to the segment assessment routine, using the clustVec (hierarchical with 5 segments after VT is collapsed to the NH segment) segments as an example:
# More generic version of varMapper that is date agnostic
varMapper <- c("cases"="Cases",
"newCases"="Increase in cases, most recent 30 days",
"casesroll7"="Rolling 7-day mean cases",
"deaths"="Deaths",
"newDeaths"="Increase in deaths, most recent 30 days",
"deathsroll7"="Rolling 7-day mean deaths",
"cpm"="Cases per million",
"cpm7"="Cases per day (7-day rolling mean) per million",
"newcpm"="Increase in cases, most recent 30 days, per million",
"dpm"="Deaths per million",
"dpm7"="Deaths per day (7-day rolling mean) per million",
"newdpm"="Increase in deaths, most recent 30 days, per million",
"hpm7"="Currently Hospitalized per million (7-day rolling mean)"
)
# Create the cluster assessments
plot_20200830_hier5 <- assessClusters(clustVec,
dfState=stateData,
dfBurden=dfPerCapita_20200830,
thruLabel="Aug 30, 2020",
plotsTogether=TRUE
)
##
## Recency is defined as 2020-07-31 through current
##
## Recency is defined as 2020-07-31 through current
# Create the hospitalized plot
dfHospital_20200830 <- plotHospitalized(rename(dfFiltered_20200830, hospitalizedCurrently=hosp),
clusterVector=clustVec,
subT="Data through August 30"
)
A function is then written to consolidate the key metrics (cases, deaths, hospitalizations) by cluster:
# Function to create plots of consolidated metrics
plotConsolidatedMetrics <- function(dfMain,
dfHosp=NULL,
varMain=c("state", "cluster", "date", "pop", "cases", "deaths", "hosp"),
varDropHosp=c("n", "pop"),
joinBy=c("state", "cluster", "date"),
subT=NULL,
nrowPlot2=1
) {
# FUNCTION ARGUMENTS:
# dfMain: the main file produced by assessClusters(), containing data by state-cluster-date
# dfHosp: the separate file with hospital data (NULL means data already available in dfMain)
# varMain: variables to keep from dfMain
# varDropHosp: variables to drop from dfHosp
# joinBy: variables for joining dfMain and dfHosp
# subT: plot subtitle (NULL will use the defaults),
# nrowPlot2: number of rows for the facetting to use on plot 2
if (is.null(subT)) {
subT <- "Cases: new cases, Deaths: new deaths, Hosp: total in hospital (not new)"
}
# Filter dfMain to include only variables in varMain
dfMain <- dfMain %>%
select_at(vars(all_of(varMain)))
# Left join dfMain to dfHosp unless dfHosp is NULL
if (!is.null(dfHosp)) {
dfHosp <- dfHosp %>%
select_at(vars(all_of(names(dfHosp)[!(names(dfHosp) %in% varDropHosp)])))
dfMain <- dfMain %>%
left_join(dfHosp, by=all_of(joinBy))
}
# Check that variables state, cluster, date, pop are all available
if (!(c("state", "cluster", "date", "pop") %in% names(dfMain) %>% all())) {
stop("\nFunction requires variables state, cluster, date, and pop after processing\n")
}
# Create the relevant plotting data
dfPlot <- dfMain %>%
pivot_longer(-c(state, cluster, date, pop)) %>%
filter(!is.na(value)) %>%
rbind(mutate(., state="cluster")) %>%
group_by_at(vars(all_of(c(joinBy, "name")))) %>%
summarize(value=sum(value), pop=sum(pop)) %>%
mutate(vpm=1000000*value/pop) %>%
arrange(state, cluster, name, date) %>%
group_by(state, cluster, name) %>%
helperRollingAgg(origVar="vpm", newName="vpm7")
# Create facetted plots for totals by metric by segment
p1 <- dfPlot %>%
filter(!is.na(vpm7)) %>%
ggplot(aes(x=date, y=vpm7)) +
geom_line(data=~filter(., state=="cluster"), aes(group=cluster, color=cluster), lwd=1.5) +
geom_line(data=~filter(., state!="cluster"), aes(group=state), alpha=0.25) +
facet_grid(name ~ cluster, scales="free_y") +
labs(x="",
y="Rolling 7-day mean per million",
title="Key metrics by cluster (7-day rolling mean per million)",
subtitle=subT
) +
scale_x_date(date_breaks="1 months", date_labels="%b") +
theme(axis.text.x=element_text(angle=90))
print(p1)
# Create all-segment plot by metric
p2 <- dfPlot %>%
filter(!is.na(vpm7)) %>%
ggplot(aes(x=date, y=vpm7)) +
geom_line(data=~filter(., state=="cluster"), aes(group=cluster, color=cluster), lwd=1.5) +
facet_wrap(~ name, scales="free_y", nrow=nrowPlot2) +
labs(x="",
y="Rolling 7-day mean per million",
title="Key metrics by cluster (7-day rolling mean per million)",
subtitle=subT
) +
scale_x_date(date_breaks="1 months", date_labels="%b") +
theme(axis.text.x=element_text(angle=90))
print(p2)
# Create all-metric plot by segment (define 100% as peak for segment-metric)
p3 <- dfPlot %>%
filter(!is.na(vpm7)) %>%
group_by(state, cluster, name) %>%
mutate(spm7=vpm7/max(vpm7)) %>%
ggplot(aes(x=date, y=spm7)) +
geom_line(data=~filter(., state=="cluster"), aes(group=name, color=name), lwd=1) +
facet_wrap(~ cluster, scales="free_y") +
labs(x="",
y="% of Maximum",
title="Key metrics by cluster (% of cluster maximum for variable)",
subtitle=subT
) +
scale_x_date(date_breaks="1 months", date_labels="%b") +
scale_color_discrete("variable") +
theme(axis.text.x=element_text(angle=90))
print(p3)
# Return the plotting data
dfPlot
}
The function can then be run for the updated data:
# Create the relevant consolidated plots
consolidatedPlotData_20200830 <- plotConsolidatedMetrics(plot_20200830_hier5)
There is increasing evidence that the primary late-outbreak segment is meaningfully past the peak and on the downswing, with the decline in deaths lagging the decline in cases and hospitalizations by a month or so. The primary early-outbreak states are not seeing much rebound in hospitalization or death, even as there has been a spike in cases (potentially near or even past plateau) in one of the segments. There is some evidence of spiking (cases and hospitalizations perhaps having peaked a month ago, deaths perhaps at or near peak now) of the mid-southern and western state segment that has generally seen low death rates from coronavirus.
The Ethical Skeptic creates interesting analyses of coronavirus, driven among other things by a search for consilience. Among the points raised are that PCR testing (for coronavirus and in other areas) has a high false-positive rate. As such, spikes in positive cases can be driven both by spikes in disease prevalence and spikes in testing. There are also meaningful lags in data reporting that can influence the timing of reported spikes.
These factors could contribute to the observation that increases in hospitalizations and deaths sometimes, but not always, follow increases in positive test results. It may also contribute to the observation that deaths no longer spike anywhere near as high for a given spike in positive cases as they did in March-May.
Next steps are to explore the evolution of reported tests (positive and negative) by cluster as well as the adjustment methodology used in the TES consilience plots (attempting to normalize reported positive cases so that a number of cases reported in April and the same number of cases reported in August represent the same level of disease prevalence).
There are many variables related to testing available in the COVID Tracking Project files:
# Columns that contain 'test' or 'Test'
testNames <- names(dfRaw_20200830) %>% grep(x=., pattern="[Tt]est", value=TRUE)
testNames
## [1] "totalTestsViral" "positiveTestsViral"
## [3] "negativeTestsViral" "totalTestEncountersViral"
## [5] "totalTestsPeopleViral" "totalTestsAntibody"
## [7] "positiveTestsAntibody" "negativeTestsAntibody"
## [9] "totalTestsPeopleAntibody" "positiveTestsPeopleAntibody"
## [11] "negativeTestsPeopleAntibody" "totalTestsPeopleAntigen"
## [13] "positiveTestsPeopleAntigen" "totalTestsAntigen"
## [15] "positiveTestsAntigen" "totalTestResultsSource"
## [17] "totalTestResults" "totalTestResultsIncrease"
# Glimpse of the data
dfRaw_20200830 %>%
select_at(vars(all_of(c("state", "date", testNames)))) %>%
glimpse()
## Observations: 9,953
## Variables: 20
## $ state <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO...
## $ date <date> 2020-08-29, 2020-08-29, 2020-08-29, 20...
## $ totalTestsViral <dbl> 345695, 967213, 706970, NA, 1190668, 11...
## $ positiveTestsViral <dbl> 5558, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ negativeTestsViral <dbl> 339808, NA, 646592, NA, NA, NA, NA, NA,...
## $ totalTestEncountersViral <dbl> NA, NA, NA, NA, NA, NA, 985622, NA, 285...
## $ totalTestsPeopleViral <dbl> NA, NA, NA, 1514, NA, NA, 695569, NA, N...
## $ totalTestsAntibody <dbl> NA, NA, NA, NA, 265542, NA, 156499, NA,...
## $ positiveTestsAntibody <dbl> NA, NA, NA, NA, NA, NA, 10454, NA, NA, ...
## $ negativeTestsAntibody <dbl> NA, NA, NA, NA, NA, NA, 141188, NA, NA,...
## $ totalTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ positiveTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ negativeTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsPeopleAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ positiveTestsPeopleAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsAntigen <dbl> NA, NA, 3610, NA, NA, NA, NA, NA, NA, N...
## $ positiveTestsAntigen <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestResultsSource <chr> "posNeg", "posNeg", "posNeg", "posNeg",...
## $ totalTestResults <dbl> 345695, 975818, 706970, 1514, 1192376, ...
## $ totalTestResultsIncrease <dbl> 6157, 10955, 8915, 0, 8299, 98993, 1263...
# Summaries by the totalTestResultsSource, totals by column
dfRaw_20200830 %>%
select_at(vars(all_of(testNames))) %>%
group_by_at("totalTestResultsSource") %>%
mutate(n=1) %>%
summarize_all(sum, na.rm=TRUE) %>%
pivot_longer(-totalTestResultsSource) %>%
filter(value > 0) %>%
ggplot(aes(x=fct_reorder(name, value), y=value)) +
geom_point() +
geom_text(aes(y=ifelse(name=="n", 15, 0.8)*value, label=scales::scientific(value)), hjust=1, size=3.5) +
facet_wrap(~totalTestResultsSource) +
coord_flip() +
scale_y_log10() +
labs(title="Sum across records by test type", x="", y="Tests (log10)")
Most of the tests are based on the source ‘posNeg’, and most are of type ‘totalTestsViral’.
The COIV Tracking Project API notes that fields ‘negative’ is a column that reports the total number of people with a negative test result. Since a person can test negative many times, this is not necessarily reflective of testing volume.
The ‘totalTestResults’ column (and therefore ‘totalTestResultsIncrease’) is heterogeneous data that cannot be easily compared across states. It is either ‘posNeg’ (sum of positive and negative) or ‘totalTestEncountersViral’.
The ‘totalTestsViral’ column is listed as being the total number of PCR tests. It may in a handful of cases include antigen tests.
The ‘totalTestEncountersViral’ column is listed as being the total number of people who underwent PCR testing in a given day (multiple tests to the same person on the same day counted as only 1).
Data availability for some of the key variables is assessed by month:
# Not NA data
notNATests <- dfRaw_20200830 %>%
filter(state %in% c(state.abb, "DC")) %>%
select_at(vars(all_of(c("state", "date", testNames)))) %>%
mutate(nState=1) %>%
group_by(date) %>%
summarize_if(is.numeric, .funs=function(x) { sum(!is.na(x))}) %>%
ungroup()
# Evolution of Not NA states by time
notNATests %>%
pivot_longer(-date) %>%
ggplot(aes(x=date,
y=value,
group=fct_rev(fct_reorder(name, value, .fun=max)),
color=fct_rev(fct_reorder(name, value, .fun=max))
)
) +
geom_line(lwd=1) +
geom_hline(yintercept=51, lty=2) +
labs(x="", y="Number of states with non-NA data", title="Evolution of data availability by metric") +
scale_color_discrete("") +
scale_x_date(date_breaks="1 months", date_labels="%m")
notNATests %>%
pivot_longer(-date) %>%
filter(date==as.Date("2020-08-29")) %>%
arrange(-value)
## # A tibble: 18 x 3
## date name value
## <date> <chr> <int>
## 1 2020-08-29 totalTestResults 51
## 2 2020-08-29 totalTestResultsIncrease 51
## 3 2020-08-29 nState 51
## 4 2020-08-29 totalTestsViral 36
## 5 2020-08-29 totalTestsPeopleViral 24
## 6 2020-08-29 totalTestsAntibody 15
## 7 2020-08-29 negativeTestsViral 13
## 8 2020-08-29 positiveTestsViral 12
## 9 2020-08-29 positiveTestsAntibody 9
## 10 2020-08-29 negativeTestsAntibody 7
## 11 2020-08-29 totalTestEncountersViral 6
## 12 2020-08-29 totalTestsPeopleAntibody 5
## 13 2020-08-29 totalTestsAntigen 4
## 14 2020-08-29 positiveTestsPeopleAntibody 3
## 15 2020-08-29 negativeTestsPeopleAntibody 3
## 16 2020-08-29 totalTestsPeopleAntigen 2
## 17 2020-08-29 positiveTestsPeopleAntigen 2
## 18 2020-08-29 positiveTestsAntigen 1
Data availability varies significantly, with only the totalTestResults and totalTestResultsIncrease columns being generally available in all states on all dates.
Next, availability of data by state over time is assessed:
# Not NA data
notNADateStateTests <- dfRaw_20200830 %>%
filter(state %in% c(state.abb, "DC")) %>%
select_at(vars(all_of(c("state", "date", testNames)))) %>%
mutate(nState=1, month=lubridate::month(date)) %>%
group_by(month, state) %>%
summarize_if(is.numeric, .funs=function(x) { min(!is.na(x))}) %>%
ungroup()
# Evolution of Not NA states by month
notNADateStateTests %>%
pivot_longer(-c(state, month)) %>%
filter(!(name %in% c("nState"))) %>%
ggplot(aes(y=fct_reorder(state, value, .fun=sum), x=month)) +
geom_tile(aes(fill=value)) +
labs(x="", y="", title="Evolution of data availability by metric") +
scale_fill_continuous("", low="white", high="green") +
facet_wrap(~name, nrow=1)
So, while the field is heterogeneous, data availability may suggest use of ‘totalTestResultsIncrease’ as a general proxy for the testing volume occurring over time. The evolution is as follows:
dfRaw_20200830 %>%
group_by(date) %>%
summarize(tests=sum(totalTestResultsIncrease, na.rm=TRUE)) %>%
ungroup() %>%
arrange(date) %>%
helperRollingAgg(origVar="tests", newName="tests7") %>%
ggplot(aes(x=date, y=tests7)) +
geom_line() +
labs(x="Month", y="Daily tests (rolling 7-day mean", title="Rolling 7-day mean tests per day") +
scale_x_date(date_breaks="1 months", date_labels="%m")
## Warning: Removed 6 rows containing missing values (geom_path).
The metric can also be assessed on a per capita basis by segment:
dfRaw_20200830 %>%
select(state, date, totalTestResultsIncrease) %>%
inner_join(stateData, by="state") %>%
mutate(cluster=factor(clustVec[state])) %>%
mutate(tpm=1000000*totalTestResultsIncrease/pop) %>%
group_by(state) %>%
arrange(date) %>%
helperRollingAgg(origVar="tpm", newName="tpm7") %>%
ungroup() %>%
helperTotalvsElements(keyVar="tpm7",
title="Evolution of tests by segment",
mapper=c("tpm7"="Tests per million per day (7-day rolling mean"),
facetScales="free_y"
)
Testing appears to have grown rapidly in all segments, though with a significant decline in testing in the “late spike” states that is associated with the same timing as the fall in cases in “late spike” states.
Most segments appear to be reporting 2000 tests per 1 million population. At a false positive rate of 1%, this would produce 20 false-positive tests per million. The New England and Mid-Atlantic states are reporting case-per-million numbers of aound this magnitude, which is suggestive that they may be catching mostly false positives rather than disease prevalence. This would be consisent with the hospitalization and deaths data being extremely low currently in these segments.
Further exploration will focus on integrating testing data with the other key metrics (cases, hospitalizations, deaths) for the overall process.
The data are run through the main preparation and assessment functions:
# STEP 1: Filter the data so that it includes tests
varsFilter_002 <- c("date", "state", "positiveIncrease", "deathIncrease",
"hospitalizedCurrently", "totalTestResultsIncrease"
)
dfFiltered_20200830_002 <- processCVData(dfRaw_20200830,
varsKeep=varsFilter_002,
varsRename=c(positiveIncrease="cases",
deathIncrease="deaths",
hospitalizedCurrently="hosp",
totalTestResultsIncrease="tests"
)
)
##
##
## Control totals - note that validState other than TRUE will be discarded
##
## # A tibble: 2 x 6
## validState cases deaths hosp tests n
## <lgl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 FALSE 35062 454 NA 406829 835
## 2 TRUE 5893205 174314 NA 76441971 9118
glimpse(dfFiltered_20200830_002)
## Observations: 9,118
## Variables: 6
## $ date <date> 2020-08-29, 2020-08-29, 2020-08-29, 2020-08-29, 2020-08-29,...
## $ state <chr> "AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", ...
## $ cases <dbl> 112, 1704, 795, 629, 4981, 430, 0, 74, 266, 3197, 2386, 264,...
## $ deaths <dbl> 0, 45, 16, 29, 144, 8, 0, 0, 0, 150, 105, 4, 13, 10, 11, 8, ...
## $ hosp <dbl> 43, 986, 407, 812, 5063, 225, 60, 76, 61, 3799, 2582, 279, 3...
## $ tests <dbl> 6157, 10955, 8915, 8299, 98993, 12632, 0, 4207, 1843, 27534,...
# STEP 2: Convert to per capita
dfPerCapita_20200830_002 <- helperMakePerCapita(dfFiltered_20200830_002,
mapVars=c("cases"="cpm", "deaths"="dpm",
"hosp"="hpm", "tests"="tpm"
)
)
glimpse(dfPerCapita_20200830_002)
## Observations: 9,118
## Variables: 14
## $ date <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01-26,...
## $ state <chr> "WA", "WA", "WA", "WA", "WA", "WA", "WA", "WA", "WA", "WA", ...
## $ cases <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ deaths <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hosp <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tests <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ cpm <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ cpm7 <dbl> NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm7 <dbl> NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm7 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm7 <dbl> NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
# STEP 3: Create the cluster assessments with appropriate variable labels
varMapper <- c("cases"="Cases",
"newCases"="Increase in cases, most recent 30 days",
"casesroll7"="Rolling 7-day mean cases",
"deaths"="Deaths",
"newDeaths"="Increase in deaths, most recent 30 days",
"deathsroll7"="Rolling 7-day mean deaths",
"cpm"="Cases per million",
"cpm7"="Cases per day (7-day rolling mean) per million",
"newcpm"="Increase in cases, most recent 30 days, per million",
"dpm"="Deaths per million",
"dpm7"="Deaths per day (7-day rolling mean) per million",
"newdpm"="Increase in deaths, most recent 30 days, per million",
"hpm7"="Currently Hospitalized per million (7-day rolling mean)",
"tpm"="Tests per million",
"tpm7"="Tests per million per day (7-day rolling mean)"
)
# Create the cluster assessments
plot_20200830_hier5_002 <- assessClusters(clustVec,
dfState=stateData,
dfBurden=dfPerCapita_20200830_002,
thruLabel="Aug 30, 2020",
plotsTogether=TRUE
)
##
## Recency is defined as 2020-07-31 through current
##
## Recency is defined as 2020-07-31 through current
# STEP 4: Plot the consolidated metrics
subT <- "Cases: new cases, Deaths: new deaths, Hosp: total in hospital (not new), Tests: new tests"
consolidatedPlotData_20200830_002 <- plotConsolidatedMetrics(plot_20200830_hier5_002,
varMain=c("state", "cluster", "date", "pop",
"cases", "deaths", "hosp", "tests"
),
subT=subT,
nrowPlot2=2
)
The consolidated data can then be used to assess the positive rate for any given state and date:
# Create percent positive by state-date
consPositive <- consolidatedPlotData_20200830_002 %>%
ungroup() %>%
select(state, cluster, date, name, vpm7) %>%
arrange(state, cluster, date, name) %>%
pivot_wider(-vpm7, names_from="name", values_from="vpm7") %>%
mutate(pctpos=cases/tests) %>%
pivot_longer(-c(state, cluster, date), values_to="vpm7") %>%
filter(!is.na(vpm7))
# Plot at segment level
consPositive %>%
filter(name=="pctpos", state=="cluster") %>%
ggplot(aes(x=date, y=vpm7)) +
geom_line(aes(group=cluster, color=cluster), lwd=1) +
scale_x_date(date_breaks="1 months", date_labels="%m") +
labs(x="Month", y="Percent positive (rolling 7-day mean)", title="Positive test rates by cluster") +
geom_hline(yintercept = 0.01, lty=2, lwd=0.5)
# Plot at state level, facetted by segment
consPositive %>%
filter(name=="pctpos", vpm7 <= 1, vpm7 >= 0) %>%
ggplot(aes(x=date, y=vpm7)) +
geom_line(data=~filter(., state=="cluster"), aes(group=cluster, color=cluster), lwd=1.5) +
geom_line(data=~filter(., state!="cluster"), aes(group=state), alpha=0.25) +
scale_x_date(date_breaks="1 months", date_labels="%m") +
labs(x="Month", y="Percent positive (rolling 7-day mean)",
title="Positive test rates by state and cluster"
) +
geom_hline(yintercept = 0.01, lty=2, lwd=0.5) +
facet_wrap(~cluster)
The segment that was hit hardest early is down around a 1% test positive rate (dotted line on the plots) on a large volume of tests. This is in the range that literature suggests as the false-positive rate for PCR testing. The segment that was hit hardest late still has test positive rates over 10%, suggestive that there is still meaningful disease being diagnosed.
Of note, the outbreaks that hit early were associated with sustained test positive rates in the 30%+ range while the other states have generally been under 20% test positive rates. This is consistent with the observation that there are far fewer hospitalizations and deaths per capita in the late segment, even as the total number of cases in the last segment surpass those of the early segments.
Cumulative metrics can be of interest also. There is no solid metric for cumulative hospitalizations, but the cases, deaths, and tests data can be converted to cumulative:
# Create the cumulative data
clusterCumulative <- consPositive %>%
filter(name %in% c("cases", "deaths", "tests"), !is.na(vpm7)) %>%
arrange(state, cluster, name, date) %>%
group_by(state, cluster, name) %>%
mutate(cum7=cumsum(vpm7)) %>%
ungroup()
# Plot the cumulative data by cluster
clusterCumulative %>%
filter(state=="cluster") %>%
ggplot(aes(x=date, y=cum7)) +
geom_line(aes(group=cluster, color=cluster)) +
facet_wrap(~name, nrow=1, scales="free_y") +
scale_x_date(date_breaks="1 months", date_labels="%m") +
labs(x="Month", y="Cumulative Burden (per million)",
title="Cumulative burden by segment (per million)"
)
# Find top-5 states in either total deaths per capita or increase in deaths per capita in the past 30 days
stateRanks <- clusterCumulative %>%
filter(name=="deaths", state!="cluster") %>%
select(state, date, cum7) %>%
group_by(state) %>%
summarize(max7=max(cum7), tminus30=sum(ifelse(date==max(date)-lubridate::days(30), cum7, 0))) %>%
ungroup() %>%
mutate(growth=max7-tminus30, rkTotal=min_rank(-max7), rkGrowth=min_rank(-growth),
flag=ifelse(pmin(rkTotal, rkGrowth)<=5, 1, 0)
) %>%
arrange(-flag, rkTotal)
flagStates <- stateRanks %>%
filter(flag==1) %>%
pull(state)
# Plot the cumulative deaths data by state
# Bold for 5-highest total and 5-highest 1-month increase
clusterCumulative %>%
filter(state!="cluster", name=="deaths") %>%
mutate(bold=ifelse(state %in% flagStates, 1, 0)) %>%
ggplot(aes(x=date, y=cum7)) +
geom_line(aes(group=state, color=cluster, alpha=0.4+0.6*bold, size=0.5+0.5*bold)) +
geom_text(data=~filter(., bold==1, date==max(date)),
aes(x=date+lubridate::days(10), label=paste0(state, ": ", round(cum7, 0)), color=cluster),
size=3, fontface="bold"
) +
scale_x_date(date_breaks="1 months", date_labels="%m") +
scale_alpha_identity() +
scale_size_identity() +
labs(x="Month", y="Cumulative Deaths (per million)",
title="Cumulative coronavirus deaths by state (per million)",
subtitle="Top 5 states for total deaths and death increase in past 30 days flagged"
)
Louisiana and Mississippi stand out as having more sustained outbreaks than the other top-death states. Most of the other states with high deaths had a sharper slope change associated around the time of their main outbreak. This is consistent with MS and (especially) LA tending to be poorly associated with the trends of the other states in their cluster.
The above approach is converted to a function so it can be repeated:
# Function to convert a file to cumulative totals
makeCumulative <- function(df,
typeVar="name",
typeKeep=c("cases", "deaths", "tests"),
valVar="vpm7",
groupVars=c("state", "cluster", "name"),
arrangeVars=c("date"),
newName="cum7"
) {
# FUNCTION ARGUMENTS:
# df: data frame containing the metrics
# typeVar: the variable holding the metric type (default is 'name')
# typeKeep: the values of typeVar to be kept
# valVar: the variable holding the metric value (default is 'vpm7')
# groupVars: groups for calculating cumulative sum
# arrangeVars: variables to be sorted by before calculating cumulative sum
# newName: the name for the new variable
# Create the cumulative data
dfCum <- df %>%
filter(get(typeVar) %in% typeKeep, !is.na(get(valVar))) %>%
arrange_at(vars(all_of(c(groupVars, arrangeVars)))) %>%
group_by_at(groupVars) %>%
mutate(!!newName:=cumsum(get(valVar))) %>%
ungroup()
# Return the processed data
dfCum
}
clCum <- makeCumulative(consPositive)
identical(clCum, clusterCumulative)
## [1] TRUE
As expected, the function produces the same results as the stand-alone code.
Next, a function is written to identify the states to flag:
# Function to find and flag states that are high on a key value or change in key value
findFlagStates <- function(df,
keyMetricVal,
keyMetricVar="name",
cumVar="cum7",
prevDays=30,
nFlag=5
) {
# FUNCTION ARGUMENTS:
# df: the data frame containing the cumulative data
# keyMetricVal: the metric of interest (e.g., "deaths", "cases", "tests")
# keyMetricVar: the variable name for the column containing the metric of interest
# cumVar: variable containing the cumulative totals
# prevDays: the number of days previous to use for defining growth
# nFlag: the number of states to flag for either total or growth (top-n of each)
# Find top-5 in either total or recent increase
dfFlag <- df %>%
filter(get(keyMetricVar)==keyMetricVal, state!="cluster") %>%
select_at(vars(all_of(c("state", "date", cumVar)))) %>%
group_by(state) %>%
summarize(maxVal=max(get(cumVar)),
tminus=sum(ifelse(date==max(date)-lubridate::days(prevDays), get(cumVar), 0))
) %>%
ungroup() %>%
mutate(growth=maxVal-tminus,
rkTotal=min_rank(-maxVal),
rkGrowth=min_rank(-growth),
flag=ifelse(pmin(rkTotal, rkGrowth)<=nFlag, 1, 0)
) %>%
arrange(-flag, rkTotal)
# Return the top values as a vector of states
dfFlag %>%
filter(flag==1) %>%
pull(state)
}
identical(flagStates, findFlagStates(clCum, keyMetricVal = "deaths"))
## [1] TRUE
As expected, the function flags the same states when applied using deaths.
Lastly, a function is written to make the plots, once by cluster and once by state with flagging:
# Function to plot cumulative data
plotCumulativeData <- function(df,
keyMetricp2,
flagsp2,
p2Desc=keyMetricp2,
keyVar="cum7",
makep1=FALSE,
makep2=TRUE
) {
# FUNCTION ARGUMENTS:
# df: the data frame of cumulative data
# keyMetricp2: the key metric to be plotted in the second plot (e.g., "deaths", "cases", "tests")
# flagsp2: states to be treated as flagged in the second plot
# p2Desc: the description to be used in plot 2
# keyVar: the key variable to plot
# makep1: boolean, whether to make the first plot
# makep2: boolean, whether to make the second plot
# Plot the cumulative data by cluster (if flag is set for producing this)
if (isTRUE(makep1)) {
p1 <- df %>%
filter(state=="cluster") %>%
ggplot(aes(x=date, y=get(keyVar))) +
geom_line(aes(group=cluster, color=cluster)) +
facet_wrap(~name, nrow=1, scales="free_y") +
scale_x_date(date_breaks="1 months", date_labels="%m") +
labs(x="Month",
y="Cumulative Burden (per million)",
title="Cumulative burden by segment (per million)"
)
print(p1)
}
# Plot the cumulative totals over time for one metric, and flag the highest
if (isTRUE(makep2)) {
p2 <- df %>%
filter(state!="cluster", name==keyMetricp2) %>%
mutate(bold=ifelse(state %in% flagsp2, 1, 0)) %>%
ggplot(aes(x=date, y=get(keyVar))) +
geom_line(aes(group=state, color=cluster, alpha=0.4+0.6*bold, size=0.5+0.5*bold)) +
geom_text(data=~filter(., bold==1, date==max(date)),
aes(x=date+lubridate::days(10),
label=paste0(state, ": ", round(get(keyVar), 0)),
color=cluster
),
size=3,
fontface="bold"
) +
scale_x_date(date_breaks="1 months", date_labels="%m") +
scale_alpha_identity() +
scale_size_identity() +
labs(x="Month",
y=paste0("Cumulative ", p2Desc, " (per million)"),
title=paste0("Cumulative coronavirus ", p2Desc, " by state (per million)"),
subtitle="Top 5 states for total and growth rate are bolded and labelled"
)
print(p2)
}
}
# Facetted plot
plotCumulativeData(clCum, keyMetricp2="", flagsp2="", makep1=TRUE, makep2=FALSE)
# Plots by state for death, cases, tests
plotCumulativeData(clCum,
keyMetricp2="deaths",
flagsp2=findFlagStates(clCum, keyMetricVal = "deaths")
)
plotCumulativeData(clCum,
keyMetricp2="cases",
flagsp2=findFlagStates(clCum, keyMetricVal = "cases")
)
plotCumulativeData(clCum,
keyMetricp2="tests",
flagsp2=findFlagStates(clCum, keyMetricVal = "tests")
)
Results are generally as expected.
The site USA Facts makes data available for coronavirus cases and deaths by county. Data are provided with one row per county and one column for each date. As of September 1, the site reports 6.02 million cases and 183 thousand deaths, roughly aligned with the 5.93 million cases and 175 thousand deaths as of August 29 in the COVID Tracking Project data. Counting cases and deaths is an inexact science and differences of ~5% are not uncommon based on different standards for counting a “case” or a “death” due to coronavirus.
The data are read in:
# File names
caseFile <- "./RInputFiles/Coronavirus/covid_confirmed_usafacts_downloaded_20200903.csv"
deathFile <- "./RInputFiles/Coronavirus/covid_deaths_usafacts_downloaded_20200903.csv"
popFile <- "./RInputFiles/Coronavirus/covid_county_population_usafacts.csv"
# Cases file
rawCases_20200903 <- readr::read_csv(caseFile)
## Parsed with column specification:
## cols(
## .default = col_double(),
## `County Name` = col_character(),
## State = col_character()
## )
## See spec(...) for full column specifications.
glimpse(rawCases_20200903)
## Observations: 3,195
## Variables: 228
## $ countyFIPS <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20` <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20` <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20` <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20` <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20` <dbl> 0, 1, 4, 0, 0, 0, 0, 0, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20` <dbl> 0, 4, 4, 0, 0, 1, 0, 1, 2, 10, 1, 1, 0, 0, 1, 1, 0, 1...
## $ `3/26/20` <dbl> 0, 6, 5, 0, 0, 2, 2, 1, 2, 13, 1, 4, 1, 0, 1, 1, 0, 1...
## $ `3/27/20` <dbl> 0, 6, 5, 0, 0, 5, 2, 1, 3, 15, 1, 7, 1, 0, 1, 3, 0, 1...
## $ `3/28/20` <dbl> 0, 6, 10, 0, 0, 5, 2, 1, 3, 17, 1, 7, 1, 0, 2, 4, 0, ...
## $ `3/29/20` <dbl> 0, 6, 15, 0, 0, 5, 2, 1, 3, 27, 2, 8, 1, 0, 2, 5, 0, ...
## $ `3/30/20` <dbl> 0, 7, 18, 0, 2, 5, 2, 1, 9, 36, 2, 10, 2, 0, 2, 5, 0,...
## $ `3/31/20` <dbl> 0, 7, 19, 0, 3, 5, 2, 1, 9, 36, 2, 11, 3, 0, 2, 5, 0,...
## $ `4/1/20` <dbl> 0, 10, 23, 0, 3, 5, 2, 1, 11, 45, 2, 13, 4, 2, 3, 6, ...
## $ `4/2/20` <dbl> 0, 10, 25, 0, 4, 6, 2, 1, 12, 67, 4, 14, 4, 2, 7, 6, ...
## $ `4/3/20` <dbl> 0, 12, 28, 1, 4, 9, 2, 1, 20, 81, 5, 15, 4, 3, 8, 7, ...
## $ `4/4/20` <dbl> 0, 12, 29, 2, 4, 10, 2, 1, 21, 87, 6, 15, 4, 7, 9, 7,...
## $ `4/5/20` <dbl> 0, 12, 34, 2, 7, 10, 2, 1, 24, 90, 6, 18, 5, 9, 9, 7,...
## $ `4/6/20` <dbl> 0, 12, 38, 3, 7, 10, 2, 1, 38, 96, 6, 20, 6, 9, 9, 9,...
## $ `4/7/20` <dbl> 0, 12, 42, 3, 8, 10, 2, 2, 48, 102, 6, 20, 6, 10, 9, ...
## $ `4/8/20` <dbl> 0, 12, 49, 3, 9, 10, 3, 3, 52, 140, 7, 22, 6, 10, 11,...
## $ `4/9/20` <dbl> 0, 17, 59, 7, 11, 11, 4, 3, 54, 161, 7, 25, 6, 13, 11...
## $ `4/10/20` <dbl> 0, 17, 59, 9, 11, 12, 4, 3, 54, 171, 7, 27, 7, 13, 11...
## $ `4/11/20` <dbl> 0, 19, 66, 10, 13, 12, 4, 6, 57, 184, 7, 30, 9, 15, 1...
## $ `4/12/20` <dbl> 0, 19, 71, 10, 16, 13, 4, 7, 60, 200, 9, 30, 10, 19, ...
## $ `4/13/20` <dbl> 0, 19, 78, 10, 17, 15, 6, 8, 61, 212, 9, 33, 10, 19, ...
## $ `4/14/20` <dbl> 0, 23, 87, 11, 17, 16, 8, 8, 62, 216, 9, 33, 12, 21, ...
## $ `4/15/20` <dbl> 0, 25, 98, 13, 19, 17, 8, 11, 62, 227, 10, 37, 13, 22...
## $ `4/16/20` <dbl> 0, 25, 102, 14, 23, 18, 8, 11, 63, 234, 11, 37, 13, 2...
## $ `4/17/20` <dbl> 0, 25, 103, 15, 23, 20, 8, 13, 63, 236, 12, 37, 13, 2...
## $ `4/18/20` <dbl> 0, 25, 109, 18, 26, 20, 9, 13, 66, 240, 12, 39, 14, 2...
## $ `4/19/20` <dbl> 0, 27, 114, 20, 28, 21, 9, 14, 72, 246, 12, 42, 14, 2...
## $ `4/20/20` <dbl> 0, 28, 117, 22, 32, 22, 11, 14, 80, 257, 12, 43, 17, ...
## $ `4/21/20` <dbl> 0, 30, 123, 28, 32, 26, 11, 15, 83, 259, 12, 44, 18, ...
## $ `4/22/20` <dbl> 0, 32, 132, 29, 33, 29, 11, 17, 85, 270, 12, 46, 21, ...
## $ `4/23/20` <dbl> 0, 33, 143, 30, 33, 31, 12, 19, 88, 275, 12, 47, 22, ...
## $ `4/24/20` <dbl> 0, 36, 147, 32, 34, 31, 12, 21, 89, 282, 12, 49, 25, ...
## $ `4/25/20` <dbl> 0, 37, 154, 33, 35, 31, 12, 28, 90, 284, 12, 49, 27, ...
## $ `4/26/20` <dbl> 0, 37, 161, 33, 38, 34, 12, 32, 90, 285, 14, 51, 32, ...
## $ `4/27/20` <dbl> 0, 39, 168, 35, 42, 34, 12, 34, 90, 289, 14, 51, 39, ...
## $ `4/28/20` <dbl> 0, 40, 171, 37, 42, 34, 12, 45, 92, 290, 15, 52, 39, ...
## $ `4/29/20` <dbl> 0, 42, 173, 37, 42, 36, 12, 51, 93, 290, 15, 52, 39, ...
## $ `4/30/20` <dbl> 0, 42, 174, 39, 42, 37, 13, 53, 93, 290, 15, 52, 43, ...
## $ `5/1/20` <dbl> 0, 42, 175, 42, 42, 39, 14, 65, 93, 290, 15, 52, 49, ...
## $ `5/2/20` <dbl> 0, 45, 181, 43, 42, 40, 14, 92, 98, 294, 15, 54, 49, ...
## $ `5/3/20` <dbl> 0, 48, 187, 45, 42, 40, 14, 105, 105, 300, 16, 57, 49...
## $ `5/4/20` <dbl> 0, 53, 188, 45, 42, 40, 16, 114, 105, 302, 16, 58, 51...
## $ `5/5/20` <dbl> 0, 53, 189, 47, 43, 40, 18, 120, 114, 304, 17, 60, 54...
## $ `5/6/20` <dbl> 0, 58, 196, 47, 43, 42, 18, 130, 114, 306, 18, 61, 54...
## $ `5/7/20` <dbl> 0, 61, 205, 51, 44, 44, 18, 155, 120, 308, 18, 63, 56...
## $ `5/8/20` <dbl> 0, 67, 208, 53, 44, 44, 21, 162, 123, 311, 21, 63, 59...
## $ `5/9/20` <dbl> 0, 68, 216, 58, 45, 44, 22, 178, 124, 314, 22, 64, 61...
## $ `5/10/20` <dbl> 0, 74, 222, 59, 46, 44, 23, 189, 124, 316, 22, 65, 66...
## $ `5/11/20` <dbl> 0, 84, 224, 61, 46, 45, 26, 196, 125, 319, 24, 67, 67...
## $ `5/12/20` <dbl> 0, 91, 227, 67, 46, 45, 26, 224, 126, 324, 24, 69, 69...
## $ `5/13/20` <dbl> 0, 93, 231, 69, 46, 45, 28, 230, 127, 324, 24, 73, 72...
## $ `5/14/20` <dbl> 0, 103, 243, 74, 46, 45, 28, 249, 128, 326, 25, 74, 7...
## $ `5/15/20` <dbl> 0, 103, 244, 79, 49, 45, 32, 258, 129, 326, 26, 75, 8...
## $ `5/16/20` <dbl> 0, 110, 254, 79, 50, 45, 35, 271, 130, 328, 27, 77, 8...
## $ `5/17/20` <dbl> 0, 110, 254, 81, 50, 46, 35, 272, 130, 328, 27, 77, 8...
## $ `5/18/20` <dbl> 0, 120, 260, 85, 50, 47, 40, 285, 133, 329, 28, 79, 8...
## $ `5/19/20` <dbl> 0, 127, 262, 90, 51, 47, 52, 295, 133, 329, 29, 80, 9...
## $ `5/20/20` <dbl> 0, 136, 270, 96, 52, 47, 64, 312, 136, 330, 30, 83, 1...
## $ `5/21/20` <dbl> 0, 147, 270, 100, 52, 48, 71, 321, 136, 330, 31, 84, ...
## $ `5/22/20` <dbl> 0, 149, 271, 104, 55, 49, 89, 329, 137, 330, 33, 85, ...
## $ `5/23/20` <dbl> 0, 155, 273, 105, 58, 49, 105, 335, 138, 330, 33, 86,...
## $ `5/24/20` <dbl> 0, 159, 274, 110, 59, 49, 111, 344, 141, 336, 33, 87,...
## $ `5/25/20` <dbl> 0, 173, 276, 116, 62, 49, 141, 368, 147, 337, 33, 87,...
## $ `5/26/20` <dbl> 0, 189, 277, 122, 66, 51, 167, 380, 150, 338, 33, 90,...
## $ `5/27/20` <dbl> 0, 192, 281, 130, 71, 53, 176, 391, 152, 340, 33, 93,...
## $ `5/28/20` <dbl> 0, 205, 281, 132, 71, 58, 185, 392, 152, 349, 34, 97,...
## $ `5/29/20` <dbl> 0, 212, 282, 147, 71, 60, 201, 396, 153, 352, 36, 99,...
## $ `5/30/20` <dbl> 0, 216, 283, 150, 72, 61, 203, 402, 154, 353, 37, 100...
## $ `5/31/20` <dbl> 0, 220, 288, 164, 75, 62, 209, 410, 157, 355, 37, 100...
## $ `6/1/20` <dbl> 0, 233, 292, 172, 76, 63, 209, 414, 164, 358, 38, 103...
## $ `6/2/20` <dbl> 0, 238, 292, 175, 76, 63, 212, 416, 165, 358, 38, 104...
## $ `6/3/20` <dbl> 0, 239, 292, 177, 76, 63, 215, 419, 165, 359, 38, 105...
## $ `6/4/20` <dbl> 0, 241, 293, 177, 76, 63, 217, 421, 167, 360, 38, 107...
## $ `6/5/20` <dbl> 0, 248, 296, 183, 76, 64, 219, 431, 169, 363, 38, 108...
## $ `6/6/20` <dbl> 0, 259, 304, 190, 77, 70, 225, 442, 174, 373, 40, 108...
## $ `6/7/20` <dbl> 0, 265, 313, 193, 77, 72, 232, 449, 176, 378, 42, 110...
## $ `6/8/20` <dbl> 0, 272, 320, 197, 79, 73, 238, 455, 178, 383, 42, 111...
## $ `6/9/20` <dbl> 0, 282, 325, 199, 85, 75, 243, 464, 180, 391, 42, 117...
## $ `6/10/20` <dbl> 0, 295, 331, 208, 89, 79, 248, 471, 182, 401, 42, 118...
## $ `6/11/20` <dbl> 0, 312, 343, 214, 93, 87, 253, 484, 184, 417, 42, 121...
## $ `6/12/20` <dbl> 0, 323, 353, 221, 97, 95, 258, 499, 188, 427, 46, 122...
## $ `6/13/20` <dbl> 0, 331, 361, 226, 100, 102, 276, 517, 190, 438, 47, 1...
## $ `6/14/20` <dbl> 0, 357, 364, 234, 104, 110, 302, 536, 195, 453, 51, 1...
## $ `6/15/20` <dbl> 0, 368, 383, 238, 111, 116, 307, 544, 204, 475, 53, 1...
## $ `6/16/20` <dbl> 0, 373, 389, 245, 116, 121, 310, 551, 206, 485, 53, 1...
## $ `6/17/20` <dbl> 0, 375, 392, 251, 118, 123, 313, 554, 208, 486, 53, 1...
## $ `6/18/20` <dbl> 0, 400, 401, 263, 121, 130, 320, 566, 210, 501, 55, 1...
## $ `6/19/20` <dbl> 0, 411, 413, 266, 126, 139, 320, 569, 210, 507, 58, 1...
## $ `6/20/20` <dbl> 0, 431, 420, 272, 126, 143, 327, 572, 211, 516, 58, 1...
## $ `6/21/20` <dbl> 0, 434, 430, 272, 127, 149, 327, 576, 213, 521, 58, 1...
## $ `6/22/20` <dbl> 0, 442, 437, 277, 129, 153, 328, 578, 215, 528, 58, 1...
## $ `6/23/20` <dbl> 0, 453, 450, 280, 135, 159, 329, 581, 216, 534, 58, 1...
## $ `6/24/20` <dbl> 0, 469, 464, 288, 141, 168, 336, 584, 220, 543, 58, 1...
## $ `6/25/20` <dbl> 0, 479, 477, 305, 149, 176, 351, 588, 233, 549, 64, 1...
## $ `6/26/20` <dbl> 0, 488, 515, 312, 153, 184, 351, 594, 236, 559, 68, 1...
## $ `6/27/20` <dbl> 0, 498, 555, 317, 161, 188, 358, 600, 245, 561, 69, 2...
## $ `6/28/20` <dbl> 0, 503, 575, 317, 162, 189, 358, 602, 245, 561, 70, 2...
## $ `6/29/20` <dbl> 0, 527, 643, 322, 165, 199, 365, 605, 269, 585, 73, 2...
## $ `6/30/20` <dbl> 0, 537, 680, 325, 170, 208, 365, 607, 276, 590, 74, 2...
## $ `7/1/20` <dbl> 0, 553, 703, 326, 174, 218, 367, 607, 278, 595, 77, 2...
## $ `7/2/20` <dbl> 0, 561, 751, 335, 179, 222, 369, 610, 288, 611, 82, 2...
## $ `7/3/20` <dbl> 0, 568, 845, 348, 189, 230, 372, 625, 330, 625, 88, 2...
## $ `7/4/20` <dbl> 0, 591, 863, 350, 190, 234, 373, 626, 340, 637, 88, 2...
## $ `7/5/20` <dbl> 0, 615, 881, 352, 193, 239, 373, 634, 362, 642, 100, ...
## $ `7/6/20` <dbl> 0, 618, 911, 356, 197, 247, 373, 634, 384, 655, 105, ...
## $ `7/7/20` <dbl> 0, 644, 997, 360, 199, 255, 373, 634, 395, 656, 106, ...
## $ `7/8/20` <dbl> 0, 651, 1056, 366, 201, 262, 374, 639, 411, 660, 114,...
## $ `7/9/20` <dbl> 0, 661, 1131, 371, 211, 282, 375, 646, 445, 672, 115,...
## $ `7/10/20` <dbl> 0, 670, 1187, 381, 218, 292, 381, 648, 465, 679, 118,...
## $ `7/11/20` <dbl> 0, 684, 1224, 398, 224, 307, 382, 654, 500, 690, 128,...
## $ `7/12/20` <dbl> 0, 706, 1294, 403, 228, 331, 383, 655, 526, 693, 129,...
## $ `7/13/20` <dbl> 0, 728, 1359, 413, 231, 350, 383, 660, 566, 702, 136,...
## $ `7/14/20` <dbl> 0, 746, 1414, 428, 236, 366, 385, 661, 589, 712, 140,...
## $ `7/15/20` <dbl> 0, 756, 1518, 441, 242, 389, 386, 664, 655, 718, 145,...
## $ `7/16/20` <dbl> 0, 780, 1599, 459, 247, 424, 389, 669, 675, 731, 152,...
## $ `7/17/20` <dbl> 0, 789, 1689, 463, 255, 440, 393, 672, 720, 742, 157,...
## $ `7/18/20` <dbl> 0, 827, 1819, 483, 264, 458, 397, 678, 741, 756, 165,...
## $ `7/19/20` <dbl> 0, 842, 1937, 495, 269, 482, 398, 686, 785, 762, 173,...
## $ `7/20/20` <dbl> 0, 857, 2013, 503, 279, 507, 400, 689, 832, 767, 179,...
## $ `7/21/20` <dbl> 0, 865, 2102, 514, 283, 524, 401, 695, 869, 774, 182,...
## $ `7/22/20` <dbl> 0, 886, 2196, 518, 287, 547, 407, 701, 891, 782, 184,...
## $ `7/23/20` <dbl> 0, 905, 2461, 534, 289, 585, 408, 706, 934, 789, 193,...
## $ `7/24/20` <dbl> 0, 921, 2513, 539, 303, 615, 411, 711, 999, 797, 205,...
## $ `7/25/20` <dbl> 0, 932, 2662, 552, 318, 637, 414, 720, 1062, 810, 207...
## $ `7/26/20` <dbl> 0, 942, 2708, 562, 324, 646, 415, 724, 1113, 821, 209...
## $ `7/27/20` <dbl> 0, 965, 2770, 569, 334, 669, 416, 730, 1194, 825, 220...
## $ `7/28/20` <dbl> 0, 974, 2835, 575, 337, 675, 429, 734, 1243, 836, 221...
## $ `7/29/20` <dbl> 0, 974, 2835, 575, 338, 675, 429, 734, 1244, 836, 221...
## $ `7/30/20` <dbl> 0, 1002, 3028, 585, 352, 731, 435, 747, 1336, 848, 23...
## $ `7/31/20` <dbl> 0, 1015, 3101, 598, 363, 767, 437, 753, 1450, 859, 23...
## $ `8/1/20` <dbl> 0, 1030, 3142, 602, 368, 792, 443, 757, 1480, 861, 24...
## $ `8/2/20` <dbl> 0, 1052, 3223, 610, 372, 813, 445, 765, 1580, 868, 25...
## $ `8/3/20` <dbl> 0, 1066, 3265, 612, 382, 830, 446, 766, 1612, 875, 26...
## $ `8/4/20` <dbl> 0, 1073, 3320, 614, 389, 836, 449, 766, 1646, 882, 26...
## $ `8/5/20` <dbl> 0, 1073, 3380, 615, 392, 839, 452, 769, 1683, 886, 27...
## $ `8/6/20` <dbl> 0, 1096, 3438, 619, 421, 874, 458, 771, 1741, 893, 28...
## $ `8/7/20` <dbl> 0, 1113, 3504, 624, 424, 909, 462, 774, 1777, 899, 29...
## $ `8/8/20` <dbl> 0, 1134, 3564, 628, 434, 923, 471, 773, 1836, 904, 29...
## $ `8/9/20` <dbl> 0, 1215, 3606, 630, 446, 934, 472, 779, 1860, 906, 30...
## $ `8/10/20` <dbl> 0, 1215, 3714, 631, 450, 947, 474, 782, 1883, 909, 30...
## $ `8/11/20` <dbl> 0, 1215, 3736, 643, 455, 958, 489, 785, 1914, 916, 30...
## $ `8/12/20` <dbl> 0, 1241, 3776, 646, 464, 967, 500, 788, 1935, 918, 31...
## $ `8/13/20` <dbl> 0, 1250, 3813, 651, 469, 977, 501, 790, 1959, 919, 32...
## $ `8/14/20` <dbl> 0, 1252, 3860, 656, 477, 989, 502, 796, 1975, 922, 32...
## $ `8/15/20` <dbl> 0, 1262, 3909, 663, 483, 996, 503, 807, 2019, 925, 33...
## $ `8/16/20` <dbl> 0, 1273, 3948, 671, 483, 1005, 504, 811, 2037, 927, 3...
## $ `8/17/20` <dbl> 0, 1274, 3960, 672, 488, 1008, 504, 814, 2055, 928, 3...
## $ `8/18/20` <dbl> 0, 1291, 3977, 674, 490, 1034, 512, 814, 2107, 937, 3...
## $ `8/19/20` <dbl> 0, 1293, 4002, 683, 503, 1049, 530, 814, 2159, 941, 3...
## $ `8/20/20` <dbl> 0, 1293, 4035, 690, 507, 1077, 534, 814, 2214, 949, 3...
## $ `8/21/20` <dbl> 0, 1293, 4054, 690, 509, 1083, 534, 814, 2228, 952, 3...
## $ `8/22/20` <dbl> 0, 1322, 4115, 699, 516, 1096, 536, 822, 2276, 957, 3...
## $ `8/23/20` <dbl> 0, 1324, 4147, 702, 523, 1099, 536, 824, 2286, 958, 3...
## $ `8/24/20` <dbl> 0, 1351, 4167, 720, 526, 1135, 536, 825, 2327, 971, 3...
## $ `8/25/20` <dbl> 0, 1355, 4190, 724, 527, 1160, 536, 826, 2345, 973, 3...
## $ `8/26/20` <dbl> 0, 1366, 4265, 732, 530, 1195, 537, 833, 2400, 983, 3...
## $ `8/27/20` <dbl> 0, 1377, 4311, 739, 533, 1213, 538, 839, 2413, 1011, ...
## $ `8/28/20` <dbl> 0, 1389, 4347, 745, 535, 1219, 541, 840, 2443, 1017, ...
## $ `8/29/20` <dbl> 0, 1400, 4424, 753, 540, 1248, 546, 855, 2499, 1024, ...
## $ `8/30/20` <dbl> 0, 1438, 4525, 757, 550, 1277, 550, 864, 2533, 1027, ...
## $ `8/31/20` <dbl> 0, 1442, 4545, 757, 554, 1287, 551, 866, 2567, 1033, ...
## $ `9/1/20` <dbl> 0, 1453, 4568, 764, 558, 1303, 559, 871, 2619, 1041, ...
# Deaths file
rawDeaths_20200903 <- readr::read_csv(deathFile)
## Parsed with column specification:
## cols(
## .default = col_double(),
## `County Name` = col_character(),
## State = col_character()
## )
## See spec(...) for full column specifications.
glimpse(rawDeaths_20200903)
## Observations: 3,195
## Variables: 228
## $ countyFIPS <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/26/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/27/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/28/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/29/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/30/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/31/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/1/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/2/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/3/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/4/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/5/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/6/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/7/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/8/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/9/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/10/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/11/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/12/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/13/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/14/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/15/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/16/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/17/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/18/20` <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/19/20` <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/20/20` <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/21/20` <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 13, 0, 0, 0, 1, 0, 0, 0, 1...
## $ `4/22/20` <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 16, 0, 0, 0, 1, 0, 1, 0, 1...
## $ `4/23/20` <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 16, 0, 1, 0, 1, 1, 1, 0, 1...
## $ `4/24/20` <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 17, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/25/20` <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/26/20` <dbl> 0, 2, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/27/20` <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/28/20` <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 19, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/29/20` <dbl> 0, 3, 2, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/30/20` <dbl> 0, 3, 3, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/1/20` <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/2/20` <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/3/20` <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/4/20` <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/5/20` <dbl> 0, 3, 5, 1, 0, 0, 0, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/6/20` <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/7/20` <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/8/20` <dbl> 0, 3, 5, 1, 1, 0, 1, 3, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/9/20` <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/10/20` <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/11/20` <dbl> 0, 3, 6, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/12/20` <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 21, 0, 1, 1, 1, 1, 1, 1, 2...
## $ `5/13/20` <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 22, 0, 1, 2, 1, 1, 1, 1, 2...
## $ `5/14/20` <dbl> 0, 3, 8, 1, 1, 0, 1, 8, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/15/20` <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/16/20` <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/17/20` <dbl> 0, 3, 8, 1, 1, 1, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/18/20` <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/19/20` <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/20/20` <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/21/20` <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/22/20` <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/23/20` <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/24/20` <dbl> 0, 3, 9, 1, 1, 1, 1, 12, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/25/20` <dbl> 0, 3, 9, 1, 1, 1, 3, 12, 3, 24, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/26/20` <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/27/20` <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/28/20` <dbl> 0, 3, 9, 1, 1, 1, 4, 15, 3, 24, 2, 1, 8, 2, 2, 1, 1, ...
## $ `5/29/20` <dbl> 0, 3, 9, 1, 1, 1, 4, 16, 3, 24, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/30/20` <dbl> 0, 4, 9, 1, 1, 1, 4, 17, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/31/20` <dbl> 0, 4, 9, 1, 1, 1, 5, 18, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `6/1/20` <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 25, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/2/20` <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/3/20` <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/4/20` <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/5/20` <dbl> 0, 5, 9, 1, 1, 1, 7, 21, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/6/20` <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/7/20` <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/8/20` <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/9/20` <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/10/20` <dbl> 0, 6, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/11/20` <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/12/20` <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/13/20` <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/14/20` <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/15/20` <dbl> 0, 6, 9, 1, 1, 1, 9, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/16/20` <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/17/20` <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/18/20` <dbl> 0, 8, 9, 1, 1, 1, 9, 25, 4, 26, 5, 3, 11, 4, 2, 1, 1,...
## $ `6/19/20` <dbl> 0, 8, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 11, 4, 2, 1, 1...
## $ `6/20/20` <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/21/20` <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/22/20` <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/23/20` <dbl> 0, 9, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 4, 2, 1, 1...
## $ `6/24/20` <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/25/20` <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/26/20` <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/27/20` <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/28/20` <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/29/20` <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/30/20` <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/1/20` <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/2/20` <dbl> 0, 13, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/3/20` <dbl> 0, 13, 10, 2, 1, 1, 10, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/4/20` <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/5/20` <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/6/20` <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/7/20` <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/8/20` <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/9/20` <dbl> 0, 14, 11, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/10/20` <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/11/20` <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/12/20` <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/13/20` <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/14/20` <dbl> 0, 18, 12, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/15/20` <dbl> 0, 19, 13, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/16/20` <dbl> 0, 20, 14, 3, 2, 1, 11, 32, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/17/20` <dbl> 0, 21, 14, 3, 2, 1, 11, 33, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/18/20` <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/19/20` <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/20/20` <dbl> 0, 21, 15, 4, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/21/20` <dbl> 0, 21, 16, 4, 2, 1, 11, 34, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/22/20` <dbl> 0, 21, 16, 4, 2, 1, 12, 34, 6, 34, 7, 5, 12, 8, 2, 1,...
## $ `7/23/20` <dbl> 0, 21, 17, 4, 2, 1, 12, 35, 6, 34, 7, 5, 12, 9, 2, 1,...
## $ `7/24/20` <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/25/20` <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/26/20` <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/27/20` <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/28/20` <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/29/20` <dbl> 0, 21, 21, 4, 2, 3, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/30/20` <dbl> 0, 21, 21, 5, 2, 3, 12, 36, 8, 38, 8, 6, 12, 9, 5, 1,...
## $ `7/31/20` <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/1/20` <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/2/20` <dbl> 0, 21, 23, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/3/20` <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/4/20` <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 12, 38, 8, 7, 12, 9, 5, 1...
## $ `8/5/20` <dbl> 0, 22, 24, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/6/20` <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/7/20` <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/8/20` <dbl> 0, 22, 26, 5, 5, 4, 12, 37, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/9/20` <dbl> 0, 22, 27, 5, 5, 4, 12, 37, 14, 38, 8, 8, 12, 9, 5, 1...
## $ `8/10/20` <dbl> 0, 22, 28, 5, 5, 4, 12, 37, 17, 38, 9, 9, 12, 10, 5, ...
## $ `8/11/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 19, 38, 9, 12, 12, 10, 5,...
## $ `8/12/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/13/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/14/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/15/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/16/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/17/20` <dbl> 0, 23, 32, 6, 6, 5, 14, 37, 23, 38, 9, 12, 12, 10, 5,...
## $ `8/18/20` <dbl> 0, 23, 33, 6, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/19/20` <dbl> 0, 23, 33, 7, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/20/20` <dbl> 0, 23, 34, 7, 6, 5, 14, 37, 25, 38, 10, 12, 12, 10, 5...
## $ `8/21/20` <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 5...
## $ `8/22/20` <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/23/20` <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/24/20` <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 27, 38, 10, 13, 12, 11, 6...
## $ `8/25/20` <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 28, 39, 10, 13, 12, 11, 6...
## $ `8/26/20` <dbl> 0, 23, 36, 7, 6, 7, 14, 37, 28, 39, 10, 13, 12, 12, 6...
## $ `8/27/20` <dbl> 0, 23, 37, 7, 6, 7, 14, 37, 30, 39, 12, 13, 12, 13, 6...
## $ `8/28/20` <dbl> 0, 23, 39, 7, 6, 9, 14, 37, 32, 40, 12, 13, 12, 13, 6...
## $ `8/29/20` <dbl> 0, 23, 40, 7, 7, 9, 14, 37, 35, 40, 12, 13, 12, 14, 6...
## $ `8/30/20` <dbl> 0, 23, 40, 7, 7, 10, 14, 37, 35, 40, 12, 13, 12, 14, ...
## $ `8/31/20` <dbl> 0, 23, 42, 7, 8, 11, 14, 37, 36, 40, 12, 13, 12, 14, ...
## $ `9/1/20` <dbl> 0, 24, 42, 7, 8, 11, 14, 37, 38, 40, 12, 13, 12, 14, ...
# Population file
rawPop_usafacts <- readr::read_csv(popFile)
## Parsed with column specification:
## cols(
## countyFIPS = col_double(),
## `County Name` = col_character(),
## State = col_character(),
## population = col_double()
## )
glimpse(rawPop_usafacts)
## Observations: 3,195
## Variables: 4
## $ countyFIPS <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ population <dbl> 0, 55869, 223234, 24686, 22394, 57826, 10101, 19448, ...
Data appear to be stored as cumulative rather than daily increases. The raw data files for cases and deaths are pivoted longer such that ‘date’ becomes a field of appropriate type:
# Conversion of the raw cases data
pvtCases_20200903 <- rawCases_20200903 %>%
rename(countyName=`County Name`, state=State) %>%
pivot_longer(-c(countyFIPS, countyName, state, stateFIPS), names_to="date", values_to="cumCases") %>%
mutate(date=lubridate::mdy(date))
glimpse(pvtCases_20200903)
## Observations: 715,680
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumCases <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
# Conversion of the raw deaths data
pvtDeaths_20200903 <- rawDeaths_20200903 %>%
rename(countyName=`County Name`, state=State) %>%
pivot_longer(-c(countyFIPS, countyName, state, stateFIPS), names_to="date", values_to="cumDeaths") %>%
mutate(date=lubridate::mdy(date))
glimpse(pvtDeaths_20200903)
## Observations: 715,680
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumDeaths <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
pop_usafacts <- rawPop_usafacts %>%
rename(countyName=`County Name`, state=State)
glimpse(pop_usafacts)
## Observations: 3,195
## Variables: 4
## $ countyFIPS <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 1017,...
## $ countyName <chr> "Statewide Unallocated", "Autauga County", "Baldwin Coun...
## $ state <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ population <dbl> 0, 55869, 223234, 24686, 22394, 57826, 10101, 19448, 113...
Control totals by state can be generated and compared with the associated COVID Tracking Project data:
# Cases by state from USA Facts
casebyState_usf <- pvtCases_20200903 %>%
filter(date==as.Date("2020-08-29")) %>%
group_by(state, date) %>%
summarize(cumCases_usaf=sum(cumCases)) %>%
ungroup()
# Deaths by state from USA Facts
deathbyState_usf <- pvtDeaths_20200903 %>%
filter(date==as.Date("2020-08-29")) %>%
group_by(state, date) %>%
summarize(cumDeaths_usaf=sum(cumDeaths)) %>%
ungroup()
# Combined data for USA Facts
byState_usf <- casebyState_usf %>%
inner_join(deathbyState_usf, by=c("state", "date"))
glimpse(byState_usf)
## Observations: 51
## Variables: 4
## $ state <chr> "AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE"...
## $ date <date> 2020-08-29, 2020-08-29, 2020-08-29, 2020-08-29, 202...
## $ cumCases_usaf <dbl> 5182, 123889, 60378, 201285, 701515, 57041, 52495, 1...
## $ cumDeaths_usaf <dbl> 37, 2152, 770, 5007, 12904, 1943, 4463, 605, 604, 11...
# Cases and deaths by state from COVID Tracking Project
byState_ctp <- dfPerCapita_20200830_002 %>%
filter(date <= as.Date("2020-08-29")) %>%
group_by(state) %>%
summarize(cumCases_ctp=sum(cases), cumDeaths_ctp=sum(deaths))
glimpse(byState_ctp)
## Observations: 51
## Variables: 3
## $ state <chr> "AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE",...
## $ cumCases_ctp <dbl> 6035, 123889, 60378, 201285, 693786, 56773, 52495, 13...
## $ cumDeaths_ctp <dbl> 37, 2152, 772, 5007, 12834, 1843, 4465, 605, 604, 112...
# Fully merged data
byState_both <- byState_ctp %>%
full_join(byState_usf, by=c("state"))
glimpse(byState_both)
## Observations: 51
## Variables: 6
## $ state <chr> "AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE"...
## $ cumCases_ctp <dbl> 6035, 123889, 60378, 201285, 693786, 56773, 52495, 1...
## $ cumDeaths_ctp <dbl> 37, 2152, 772, 5007, 12834, 1843, 4465, 605, 604, 11...
## $ date <date> 2020-08-29, 2020-08-29, 2020-08-29, 2020-08-29, 202...
## $ cumCases_usaf <dbl> 5182, 123889, 60378, 201285, 701515, 57041, 52495, 1...
## $ cumDeaths_usaf <dbl> 37, 2152, 770, 5007, 12904, 1943, 4463, 605, 604, 11...
Comparisons by state are made first for cases:
# Plot of cases by source
byState_both %>%
select(state, `COVID Tracking Project`=cumCases_ctp, `USA Facts`=cumCases_usaf) %>%
pivot_longer(-state) %>%
ggplot(aes(x=fct_reorder(state, value, .fun=min), y=value)) +
geom_point(aes(color=name)) +
coord_flip() +
scale_y_log10() +
labs(x="", y="Cases as of August 29", title="Cases through August 29 by source") +
scale_color_discrete("Source")
# Plot of percent difference by state
byState_both %>%
mutate(deltaCases=cumCases_usaf-cumCases_ctp, pctDelta=2*deltaCases/(cumCases_usaf+cumCases_ctp)) %>%
select(state, `Absolute Delta`=deltaCases, `Percent Delta`=pctDelta) %>%
pivot_longer(-state) %>%
ggplot(aes(x=fct_reorder(state, value), y=value)) +
geom_point() +
coord_flip() +
labs(x="",
y="Difference as of August 29",
title="Difference in cases by state and source through August 29",
subtitle="Delta is USA Facts minus COVID Tracking Project (positive means USA Facts greater)"
) +
facet_wrap(~name, scales="free_x")
At a glance, there are minor differences by state, most accounting for a handful of percent or less. Nothing appears likely to impact the analysis.
The same analysis is run for deaths:
# Plot of deaths by source
byState_both %>%
select(state, `COVID Tracking Project`=cumDeaths_ctp, `USA Facts`=cumDeaths_usaf) %>%
pivot_longer(-state) %>%
ggplot(aes(x=fct_reorder(state, value, .fun=min), y=value)) +
geom_point(aes(color=name)) +
coord_flip() +
scale_y_log10() +
labs(x="", y="Deaths as of August 29", title="Deaths through August 29 by source") +
scale_color_discrete("Source")
# Plot of percent difference by state
byState_both %>%
mutate(deltaDeaths=cumDeaths_usaf-cumDeaths_ctp, pctDelta=2*deltaDeaths/(cumDeaths_usaf+cumDeaths_ctp)) %>%
select(state, `Absolute Delta`=deltaDeaths, `Percent Delta`=pctDelta) %>%
pivot_longer(-state) %>%
ggplot(aes(x=fct_reorder(state, value), y=value)) +
geom_point() +
coord_flip() +
labs(x="",
y="Difference as of August 29",
title="Difference in deaths by state and source through August 29",
subtitle="Delta is USA Facts minus COVID Tracking Project (positive means USA Facts greater)"
) +
facet_wrap(~name, scales="free_x")
The difference in deaths is driven largely by New York, where USA Facts reports ~7000 more deaths (~25% of the baseline deaths) than COVID Tracking Project. Most other states are fairly well aligned between the data sources, and the caution is noted that conclusions focused on New York (standalone or segment) may be heavily influenced by selection of data source.
Evolution of deaths and cases over time is also assessed:
usfByDate <- pvtDeaths_20200903 %>%
group_by(date) %>%
summarize(deaths_usf=sum(cumDeaths)) %>%
ungroup() %>%
full_join(pvtCases_20200903 %>%
group_by(date) %>%
summarize(cases_usf=sum(cumCases)) %>%
ungroup()
)
## Joining, by = "date"
bothByDate <- dfPerCapita_20200830 %>%
group_by(state) %>%
arrange(date) %>%
mutate(cumCases=cumsum(cases), cumDeaths=cumsum(deaths)) %>%
group_by(date) %>%
summarize(cases_ctp=sum(cumCases), deaths_ctp=sum(cumDeaths)) %>%
full_join(usfByDate) %>%
pivot_longer(-date) %>%
tidyr::separate(name, into=c("type", "source"), sep="_")
## Joining, by = "date"
bothByDate %>%
ggplot(aes(x=date, y=value/1000)) +
geom_line(aes(group=source, color=source)) +
facet_wrap(~type, scales="free_y") +
labs(x="Month (2020)",
y="Cumulative USA National Burden (000s)",
title="Evolution of reported coronavirus burden in USA by source"
) +
scale_x_date(date_breaks="1 months", date_labels="%m")
## Warning: Removed 3 rows containing missing values (geom_path).
bothByDate %>%
group_by(date, type) %>%
summarize(delta=sum(ifelse(source=="usf", value, 0))-sum(ifelse(source=="ctp", value, 0))) %>%
ungroup() %>%
ggplot(aes(x=date, y=delta)) +
geom_col(fill="lightblue") +
facet_wrap(~type, scales="free_y") +
labs(x="Month (2020)",
y="Delta as USA Facts minus COVID Tracking Project",
title="Evolution of delta in reported cumulative coronavirus burden in USA by source",
subtitle="Positive numbers mean higher reported total in USA Facts than in COVID Tracking Project"
) +
scale_x_date(date_breaks="1 months", date_labels="%m")
## Warning: Removed 6 rows containing missing values (position_stack).
A second investigation is taken with the New York data removed:
usfByDate_nony <- pvtDeaths_20200903 %>%
filter(state != "NY") %>%
group_by(date) %>%
summarize(deaths_usf=sum(cumDeaths)) %>%
ungroup() %>%
full_join(pvtCases_20200903 %>%
filter(state != "NY") %>%
group_by(date) %>%
summarize(cases_usf=sum(cumCases)) %>%
ungroup()
)
## Joining, by = "date"
bothByDate_nony <- dfPerCapita_20200830 %>%
filter(state != "NY") %>%
group_by(state) %>%
arrange(date) %>%
mutate(cumCases=cumsum(cases), cumDeaths=cumsum(deaths)) %>%
group_by(date) %>%
summarize(cases_ctp=sum(cumCases), deaths_ctp=sum(cumDeaths)) %>%
full_join(usfByDate_nony) %>%
pivot_longer(-date) %>%
tidyr::separate(name, into=c("type", "source"), sep="_")
## Joining, by = "date"
bothByDate_nony %>%
ggplot(aes(x=date, y=value/1000)) +
geom_line(aes(group=source, color=source)) +
facet_wrap(~type, scales="free_y") +
labs(x="Month (2020)",
y="Cumulative USA National Burden (000s)",
title="Evolution of reported coronavirus burden in USA by source - excludes New York State"
) +
scale_x_date(date_breaks="1 months", date_labels="%m")
## Warning: Removed 3 rows containing missing values (geom_path).
bothByDate_nony %>%
group_by(date, type) %>%
summarize(delta=sum(ifelse(source=="usf", value, 0))-sum(ifelse(source=="ctp", value, 0))) %>%
ungroup() %>%
ggplot(aes(x=date, y=delta)) +
geom_col(fill="lightblue") +
facet_wrap(~type, scales="free_y") +
labs(x="Month (2020)",
y="Delta as USA Facts minus COVID Tracking Project",
title="Delta in reported cumulative coronavirus burden in USA by source - excludes New York State",
subtitle="Positive numbers mean higher reported total in USA Facts than in COVID Tracking Project"
) +
scale_x_date(date_breaks="1 months", date_labels="%m")
## Warning: Removed 6 rows containing missing values (position_stack).
The plots provide further evidence that the sources are well aligned with the exception of New York state. There is some directional evidence that COVID Tracking Project reports deaths earlier than USA Facts, though a cumuluative difference of 500 deaths will have modest if any impact on the analysis.
The population data is also compared to usmap::statepop, that data source used for the existing analyses:
pop_usafacts %>%
group_by(state) %>%
summarize(usf=sum(population)) %>%
ungroup() %>%
full_join(select(usmap::statepop, state=abbr, usmap=pop_2015)) %>%
ggplot(aes(x=usmap, y=usf)) +
geom_text(aes(label=state)) +
labs(x="Population in usmap::statepop", y="Population is USA Facts", title="Comparison of Population") +
geom_abline(lty=2) +
scale_x_log10() +
scale_y_log10()
## Joining, by = "state"
pop_usafacts %>%
group_by(state) %>%
summarize(usf=sum(population)) %>%
ungroup() %>%
full_join(select(usmap::statepop, state=abbr, usmap=pop_2015)) %>%
ggplot(aes(x=fct_reorder(state, usf/usmap-1), y=usf/usmap-1)) +
geom_col(fill="lightblue") +
coord_flip() +
labs(x="",
y="% Change in Population (USA Facts vs. usmap::statepop)",
title="Comparison of Population",
subtitle="Positive means larger in USA Facts than in usmap::statepop"
) +
scale_y_continuous(labels=scales::percent)
## Joining, by = "state"
USA Facts appear to be using more recent estimates of US population which include a generally larger population and with different states gaining or losing population at different rates.
Next steps are to explore the coronavirus burden by county using the USA Facts data.
A handful of states assign meaningful amounts of disease burden to “unassigned”:
unassignedState <- pvtCases_20200903 %>%
mutate(unassigned=ifelse(countyFIPS==0, 1, 0)) %>%
group_by(state, date, unassigned) %>%
summarize(cases=sum(cumCases)) %>%
ungroup() %>%
full_join(pvtDeaths_20200903 %>%
mutate(unassigned=ifelse(countyFIPS==0, 1, 0)) %>%
group_by(state, date, unassigned) %>%
summarize(deaths=sum(cumDeaths)) %>%
ungroup()
)
## Joining, by = c("state", "date", "unassigned")
unassignedState %>%
filter(date=="2020-08-29") %>%
pivot_longer(c("cases", "deaths")) %>%
group_by(state, name) %>%
summarize(total=sum(value), unass=sum(value*unassigned)/sum(value)) %>%
ggplot(aes(x=fct_reorder(state, unass*ifelse(name=="cases", 1, 0)), y=unass)) +
geom_point() +
geom_text(data=~filter(., (name=="cases" & unass >= 0.02) | (name=="deaths" & unass > 0.005)),
aes(y=unass + ifelse(name=="cases", -0.0025, -0.001),
label=paste0(round(100*unass, 1), "% (", state ,")")
),
size=3,
hjust=1
) +
coord_flip() +
labs(x="", y="% Unassigned as of August 29, 2020", title="Unassigned percentage by state") +
facet_wrap(~name, scales="free_x")
These amounts appear to be modest overall and unlikely to meaningfully drive macro-level findings. Perhaps an algorithm can be considered for assigning cases to counties in proportion to total burden and/or population by county in the assigned portion of the state.
Four states hit hard on a per capita basis are in the northern Acela corridor - NJ, NY, CT, MA. A graph is created for the per capita death rate and disease rate in these geographies, with RI, VT, and NH added since they are contiguous. Data in the unassigned bucket are not included:
# Function to plot per capita data by county
plotCountyPerCapita <- function(dfDisease,
burdenVar,
useDate,
plotTitle="",
inclStates=NULL,
dfCounty=filter(pop_usafacts, countyFIPS!=0),
popVar="population",
highColor="darkblue",
maxPerCap=NULL,
printPlot=TRUE,
returnData=!printPlot
) {
# FUNCTION ARGUMENTS
# dfDisease: file containing disease data
# burdenVar: variable for disease burden (cumulative) on date
# useDate: date for the analysis
# plotTitle: title for the plot
# inclStates: states to include (NULL means include all)
# dfCounty: data for county-level population
# popVar: variable for population in the dfCounty file
# maxPerCap: the maximum amount to be used for per capita (everything at or above shaded the same)
# highColor: the color to be used for high disease burden
# printPlot: boolean, whether to print the plot
# returnData: boolean, whether to return the underlying data (if FALSE, the plot object is returned)
# Create the relevant data frame
dfData <- dfDisease %>%
left_join(dfCounty, by=c("countyFIPS", "state")) %>%
filter(date %in% useDate, countyFIPS!=0, population>0) %>%
mutate(burden=1000000*get(burdenVar)/get(popVar))
# Modify inclStates to be every state (if needed due to NULL)
if (is.null(inclStates)) inclStates <- dfData %>% pull(state) %>% unique() %>% sort()
# Create the relevant plot (this is necessary if printPlot is TRUE or returnData is FALSE)
if (printPlot | !returnData) {
p1 <- dfData %>%
mutate(burden=if(is.null(maxPerCap)) burden else pmin(burden, maxPerCap)) %>%
select(fips=countyFIPS, burden) %>%
usmap::plot_usmap(regions="counties", data=., values="burden", include=inclStates)
if (is.null(maxPerCap))
p1 <- p1 + scale_fill_continuous(plotTitle, low="white", high=highColor)
else {
p1 <- p1 +
scale_fill_continuous(plotTitle, low="white", high=highColor, limits=c(0, maxPerCap)) +
labs(caption=paste0("Values at/above ", maxPerCap, " plotted as ", maxPerCap))
}
}
# Print the plot if requested
if (printPlot) print(p1)
# Return the data if requested, otherwise return the plot object
if (returnData) dfData %>% filter(state %in% inclStates) else p1
}
deathPlot <- plotCountyPerCapita(pvtDeaths_20200903,
burdenVar="cumDeaths",
useDate=as.Date(c("2020-08-29")),
plotTitle="Deaths\nper million",
inclStates=c("NJ", "NY", "CT", "MA", "RI", "VT", "NH"),
highColor="darkred",
printPlot=FALSE,
returnData=FALSE
)
casesPlot <- plotCountyPerCapita(pvtCases_20200903,
burdenVar="cumCases",
useDate=as.Date(c("2020-08-29")),
plotTitle="Cases\nper million",
inclStates=c("NJ", "NY", "CT", "MA", "RI", "VT", "NH"),
highColor="darkblue",
printPlot=FALSE,
returnData=FALSE
)
gridExtra::grid.arrange(casesPlot,
deathPlot,
nrow=1,
top=grid::textGrob("Per capita coronavirus burden as of August 29, 2020",
gp=grid::gpar(fontface=2, fontsize=12),
hjust=0,
x=0.05
)
)
There is significant variation in disease burden by county, with many of the more rural counties having a modest coronavirus burden per capita relative to the more densely populated areas.
The data are then pulled using the function without plotting:
# All possible dates
allDates <- pvtDeaths_20200903 %>% count(date) %>% pull(date)
# All northeast deaths data for all dates, per capita
perCapDeathNE <- plotCountyPerCapita(pvtDeaths_20200903,
burdenVar="cumDeaths",
useDate=allDates,
plotTitle="Deaths\nper million",
inclStates=c("NJ", "NY", "CT", "MA", "RI", "VT", "NH"),
highColor="darkred",
printPlot=FALSE,
returnData=TRUE
)
# Top 10 counties hit
top10FIPS <- perCapDeathNE %>%
filter(date=="2020-09-01") %>%
arrange(-burden) %>%
head(10) %>%
pull(countyFIPS)
# Evolution of per capita deaths by date in the northeast
perCapDeathNE %>%
mutate(county=paste0(str_replace(countyName.x, " County", ""), " (", state, ")"),
bold=ifelse(countyFIPS %in% top10FIPS, 1, 0)
) %>%
arrange(date) %>%
ggplot(aes(x=date, y=burden)) +
geom_line(aes(group=countyFIPS, color=state, alpha=0.25 + 0.75*bold, size=0.5+0.5*bold)) +
scale_alpha_identity() +
scale_size_identity() +
geom_text(data=~filter(., bold==1, date==max(date)),
aes(x=date+lubridate::days(2), label=paste0(county, ": ", round(burden)), color=state),
size=3, fontface="bold", hjust=0
) +
scale_x_date(date_breaks="1 months", date_labels="%m", expand=expand_scale(0.2))
The hardest hit counties are in the NYC metro area and have deaths per capita in the 0.2%-0.35% range. The disases is estimated to have a fatality rate of 0.25%-0.50% (though with a lot of uncertainty), so there is some evidence that a large percentage of these areas may have contracted coronavirus if the reported death data and fatality rates are accurate.
The functions are further integrated so they can be repeated for different geographies:
# Function to make the cases and deaths by county plot
casesDeathsByCounty <- function(useDate,
inclStates=NULL,
caseData=pvtCases_20200903,
deathData=pvtDeaths_20200903,
highCaseColor="darkblue",
highDeathColor="darkred",
highCaseAmount=NULL,
highDeathAmount=NULL,
mainTitle=NULL
) {
# FUNCTION ARGUMENTS:
# useDate: the date to be plotted
# inclStates: the states to be included (NULL means all)
# caseData: the frame containing the cases date
# deathData: the frame containing the death data
# highCaseColor: color for the highest level of cases in a county
# highDeathColor: color for the highest level of deaths in a county
# highCaseAmount: cases at/above this level will be the same color (NULL means leave as-is)
# highDeathAmount: deaths at/above this level will be the same color (NULL means leave as-is)
# mainTitle: main title for the grid.arrange (NULL means useDate will be used)
# Convert useDate to date if not already in that format
if (!("Date") %in% class(useDate)) useDate <- as.Date(useDate)
# Convert inclStates to be all states if not specified
if (is.null(inclStates)) inclStates <- caseData %>% pull(state) %>% unique() %>% sort()
# Create mainTitle if not passed
if (is.null(mainTitle))
mainTitle <- paste0("Per capita coronavirus burden as of ", format(useDate, "%B %d, %Y"))
# Create the plot for deaths
deathPlot <- plotCountyPerCapita(deathData,
burdenVar="cumDeaths",
useDate=useDate,
plotTitle="Deaths\nper million",
inclStates=inclStates,
highColor=highDeathColor,
maxPerCap=highDeathAmount,
printPlot=FALSE,
returnData=FALSE
)
# Create the plot for cases
casesPlot <- plotCountyPerCapita(caseData,
burdenVar="cumCases",
useDate=useDate,
plotTitle="Cases\nper million",
inclStates=inclStates,
highColor=highCaseColor,
maxPerCap=highCaseAmount,
printPlot=FALSE,
returnData=FALSE
)
# Print the combined plot object
gridExtra::grid.arrange(casesPlot,
deathPlot,
nrow=1,
top=grid::textGrob(mainTitle,
gp=grid::gpar(fontface=2, fontsize=12),
hjust=0,
x=0.05
)
)
}
casesDeathsByCounty(useDate="2020-08-29",
inclStates=c("FL", "GA", "SC", "AL", "MS"),
highCaseAmount=80000,
highDeathAmount=2000
)
casesDeathsByCounty(useDate="2020-08-29",
inclStates=c("LA", "TX"),
highCaseAmount=80000,
highDeathAmount=2000
)
casesDeathsByCounty(useDate="2020-08-29",
inclStates=c("NV", "NM", "AZ", "CO", "UT"),
highCaseAmount=80000,
highDeathAmount=2000
)
The high impact areas show meaningful differences in burden by county, which will be explored further as a next step.
A function is written to plot the evolution of county-level statistics:
# Function to plot evolution of county-level burdens
countyLevelEvolution <- function(dfBurden,
burdenVar,
inclStates=NULL,
topN=10,
topNDate=NULL,
printPlot=TRUE,
returnData=TRUE,
plotTitle=NULL,
countyPopFloor=0,
subT=NULL
) {
# FUNCTION ARGUMENTS:
# dfBurden: file containing the relevant per-capita burden data
# burdenVar: the name of the variable containing the burden per-capita data
# inclStates: states to be included (default NULL means include all)
# topN: integer, number of counties to flag as "top"
# topNDate: the data to use as the topN cutpoint (NULL means most recent)
# printPlot: boolean, whether to print the plot
# returnData: boolean, if TRUE return the per-capita data file, otherwise return the plot object
# plotTitle: title for the plot (NULL means assume from burdenVar)
# countyPopFloor: floor for county population for the county to be plotted
# subT: subtitle for the chart (NULL means none)
# Adjust inclStates if NULL
if (is.null(inclStates)) inclStates <- dfBurden %>% count(state) %>% pull(state)
# Create plotTitle if needed
if (is.null(plotTitle)) {
plotTitle <- if(stringr::str_detect(string=stringr::str_to_lower(burdenVar), pattern="deaths")) {
"Cumulative per-capita deaths by county"
} else {
"Cumulative per-capita cases by county"
}
}
# Get all possible dates
allDates <- dfBurden %>% count(date) %>% pull(date)
# Get the data for the counties in the relevant states (return data only and do not plot)
perCapData <- plotCountyPerCapita(dfBurden,
burdenVar=burdenVar,
useDate=allDates,
inclStates=inclStates,
printPlot=FALSE,
returnData=TRUE
)
# Get the relevant top-N date and convert to Date if not already of that type
if (is.null(topNDate)) topNDate <- perCapData %>% pull(date) %>% max()
if (!("Date" %in% class(topNDate))) topNDate <- as.Date(topNDate)
# Get the top-n counties by burdenVar
# Top 10 counties hit by FIPS, counting only those that exceed the population floor
topN <- perCapData %>%
filter(date==topNDate, population>=countyPopFloor) %>%
arrange(-burden) %>%
head(topN) %>%
pull(countyFIPS)
# Update perCapData with easy-read county name and bolding instructions
perCapData <- perCapData %>%
mutate(county=paste0(str_replace(countyName.x, " County", ""), " (", state, ")"),
bold=ifelse(countyFIPS %in% topN, 1, 0)
) %>%
arrange(date)
# Create the plot of all counties with the topN flagged
# Evolution of per capita deaths by date in the northeast
p1 <- perCapData %>%
filter(population>=countyPopFloor) %>%
ggplot(aes(x=date, y=burden)) +
geom_line(aes(group=countyFIPS, color=state, alpha=0.25 + 0.75*bold, size=0.5+0.5*bold)) +
scale_alpha_identity() +
scale_size_identity() +
geom_text(data=~filter(., bold==1, date==max(date)),
aes(x=date+lubridate::days(2), label=paste0(county, ": ", round(burden)), color=state),
size=3,
fontface="bold",
hjust=0
) +
scale_x_date(date_breaks="1 months", date_labels="%m", expand=expand_scale(mult=c(0, 0.4))) +
labs(x="", y="Burden per million people", title=plotTitle) +
theme(legend.position="bottom")
# Add the subtitle if passed
if (!is.null(subT)) p1 <- p1 + labs(subtitle=subT)
# Print the plot if requested
if (printPlot) print(p1)
# Return the relevant object
if (returnData) perCapData else p1
}
# Deaths and cases in SE counties
deathSE <- countyLevelEvolution(pvtDeaths_20200903,
burdenVar="cumDeaths",
inclStates=c("FL", "GA", "SC", "AL", "MS"),
topN=5,
printPlot=FALSE,
returnData=FALSE
)
casesSE <- countyLevelEvolution(pvtCases_20200903,
burdenVar="cumCases",
inclStates=c("FL", "GA", "SC", "AL", "MS"),
topN=5,
printPlot=FALSE,
returnData=FALSE
)
gridExtra::grid.arrange(casesSE, deathSE, nrow=1)
# Deaths and cases in LA-TX-AZ-NV
deathSC <- countyLevelEvolution(pvtDeaths_20200903,
burdenVar="cumDeaths",
inclStates=c("LA", "TX", "AZ", "NV"),
topN=5,
printPlot=FALSE,
returnData=FALSE
)
casesSC <- countyLevelEvolution(pvtCases_20200903,
burdenVar="cumCases",
inclStates=c("LA", "TX", "AZ", "NV"),
topN=5,
printPlot=FALSE,
returnData=FALSE
)
gridExtra::grid.arrange(casesSC, deathSC, nrow=1)
The hardest hit counties tend to be smaller in population (e.g., East Carroll Parish is 7000 people and Madison Parish is 11000 people). Suppose instead that the analysis is limited to areas that clear a specified population threshold:
pop_usafacts %>%
filter(countyFIPS != 0) %>%
mutate(pop25k=ifelse(population>=25000, 1, 0)) %>%
group_by(state) %>%
summarize(nCounty=n(),
n25k=sum(pop25k),
pop=sum(population),
pop25k=sum(population*pop25k)
) %>%
mutate(pctPopin25k=pop25k/pop) %>%
arrange(-pctPopin25k) %>%
filter(state %in% c("SC", "GA", "FL", "AL", "MS", "LA", "TX", "AZ", "NM"))
## # A tibble: 9 x 6
## state nCounty n25k pop pop25k pctPopin25k
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 AZ 15 13 7278717 7248111 0.996
## 2 FL 67 53 21477737 21268283 0.990
## 3 SC 46 36 5148714 4977681 0.967
## 4 TX 254 102 28995881 27502195 0.948
## 5 NM 33 17 2096829 1929618 0.920
## 6 AL 67 40 4903185 4473904 0.912
## 7 LA 64 37 4648794 4222140 0.908
## 8 GA 159 74 10617423 9498452 0.895
## 9 MS 82 37 2976149 2336419 0.785
With the exception of Mississippi (80%), 90% or more of the population lives in counties with at least 25000 people. The countyLevelEvolution() function is updated for a county-level exclusion process:
# Deaths and cases in SE counties
deathSE <- countyLevelEvolution(pvtDeaths_20200903,
burdenVar="cumDeaths",
inclStates=c("FL", "GA", "SC", "AL", "MS"),
topN=5,
printPlot=FALSE,
returnData=FALSE,
countyPopFloor=25000,
subT="Excludes counties with population <25k"
)
casesSE <- countyLevelEvolution(pvtCases_20200903,
burdenVar="cumCases",
inclStates=c("FL", "GA", "SC", "AL", "MS"),
topN=5,
printPlot=FALSE,
returnData=FALSE,
countyPopFloor=25000,
subT="Excludes counties with population <25k"
)
gridExtra::grid.arrange(casesSE, deathSE, nrow=1)
# Deaths and cases in LA-TX-AZ-NV
deathSC <- countyLevelEvolution(pvtDeaths_20200903,
burdenVar="cumDeaths",
inclStates=c("LA", "TX", "AZ", "NV"),
topN=5,
printPlot=FALSE,
returnData=FALSE,
countyPopFloor=25000,
subT="Excludes counties with population <25k"
)
casesSC <- countyLevelEvolution(pvtCases_20200903,
burdenVar="cumCases",
inclStates=c("LA", "TX", "AZ", "NV"),
topN=5,
printPlot=FALSE,
returnData=FALSE,
countyPopFloor=25000,
subT="Excludes counties with population <25k"
)
gridExtra::grid.arrange(casesSC, deathSC, nrow=1)
Cases appear to be leveling off in the hardest hit counties, but there has been rapid growth in both cases and deaths in some of the counties associated with the hardest hit regions.
Next, counties are plotted for both their total reported cases and deaths (each per million), with unassigned data excluded:
# Deaths and cases in SE counties
deathUS <- countyLevelEvolution(pvtDeaths_20200903,
burdenVar="cumDeaths",
inclStates=NULL,
topN=5,
printPlot=FALSE,
returnData=TRUE
)
casesUS <- countyLevelEvolution(pvtCases_20200903,
burdenVar="cumCases",
inclStates=NULL,
topN=5,
printPlot=FALSE,
returnData=TRUE
)
# Join the files so there is countyFIPS-county-population-date and cumCases cumDeaths
# Also, add the hierarchical (method="complete", 5 segments) segment for the associated state
burdenUS <- deathUS %>%
select(-countyName.x, -countyName.y, -bold, cumDeathPer=burden) %>%
inner_join(casesUS %>%
select(-countyName.x, -countyName.y, -bold, cumCasesPer=burden),
by=c("countyFIPS", "stateFIPS", "county", "state", "date", "population")
) %>%
mutate(cluster=clustVec[state])
# Plot the data as of September 1, 2020
burdenUS %>%
filter(date=="2020-09-01", population>=10000) %>%
ggplot(aes(x=cumCasesPer, y=cumDeathPer)) +
geom_point(aes(size=log10(population), color=factor(cluster)), alpha=0.25) +
geom_smooth(method="lm", se=FALSE, aes(weight=population, color=factor(cluster))) +
scale_size_continuous("log10\nCounty\nPop") +
labs(x="Cases per million as of Sep 1, 2020",
y="Deaths per million as of Sep 1, 2020",
title="County-level per-capita coronavirus burden as of Sep 1, 2020",
subtitle="Filtered to counties with population of at least 10000"
) +
facet_wrap(~cluster)
# Further, the total burden by cluster is plotted
burdenUS %>%
filter(date=="2020-09-01", population>=0) %>%
group_by(cluster) %>%
summarize(population=sum(population),
cumCases=sum(cumCases),
cumDeath=sum(cumDeaths),
mdn_CasesPer=median(cumCasesPer),
mdn_DeathPer=median(cumDeathPer)
) %>%
mutate(mean_CasesPer=1000000*cumCases/population, mean_DeathPer=1000000*cumDeath/population) %>%
select(cluster, starts_with("mdn"), starts_with("mean")) %>%
pivot_longer(-cluster) %>%
mutate(aggType=stringr::str_replace(name, "_.*", ""),
metType=stringr::str_replace(name, ".*_", "")
) %>%
pivot_wider(c(cluster, aggType), names_from="metType", values_from="value") %>%
ggplot(aes(x=CasesPer, y=DeathPer)) +
geom_point(aes(color=factor(cluster)), size=5) +
labs(x="Cases per million as of Sep 1, 2020",
y="Deaths per million as of Sep 1, 2020",
title="Cluster-level per-capita coronavirus burden as of Sep 1, 2020",
subtitle="State-level clusters based on hierarchical (method=`complete`)"
) +
scale_color_discrete("Cluster") +
ylim(c(0, NA)) +
xlim(c(0, NA)) +
facet_wrap(~c("mdn"="Median of all counties in segment", "mean"="Segment-level metric")[aggType])
There is a very different slope of deaths vs. cases that seems to be consistent across counties, particularly for the segment that was hit early and hard. There is a significant difference in the median county-level burden and the overall segment-level burden, suggestive that heavier outbreaks in larger counties may play a role. Notably, the southern states that have been hit late have similar metrics whether reported as the median by county or aggregate by segment, potentially suggestive of a lesser population-driven impact in this segment.
With some modifications, the clusterStates() function can be used to cluster the county-level data:
The existing code that calls the kmeans clustering is copied below for reference:
# Test clusters that weight deaths heavily vs. cases and that weight shape more highly than total
# Using kmeans and testing for 1-10 clusters
# testCluster_km5 <- clusterStates(cvFilteredPerCapita,
# minShape=3,
# ratioDeathvsCase = 5,
# ratioTotalvsShape = 0.5,
# minDeath=100,
# minCase=10000,
# hierarchical=FALSE,
# nCenters=5,
# testCenters=1:10,
# iter.max=20,
# nstart=10,
# returnList=TRUE,
# seed=2008261400
# )
# Check how 5 clusters look
# clustVec_km5 <- testCluster_km5$objCluster$cluster
# Create the cluster assessments
# plotData_km5 <- assessClusters(clustVec_km5)
# STEP 1: Select only desired variables from burdenUS
countyCumPerCapita <- burdenUS %>%
select(state=countyFIPS, date, cpm=cumCasesPer, dpm=cumDeathPer, population) %>%
arrange(state, date)
# STEP 1a: Confirm that there are no duplicates and that every county has the same dates
# This should be 1 provided that there are no duplicates
countyCumPerCapita %>%
count(state, date) %>%
pull(n) %>%
max()
## [1] 1
# This should have no standard deviation if the same number of records exist on every day
countyCumPerCapita %>%
mutate(n=1) %>%
group_by(date) %>%
summarize(n=sum(n), population=sum(population)) %>%
summarize_at(vars(all_of(c("n", "population"))), .funs=list(min=min, max=max))
## # A tibble: 1 x 4
## n_min population_min n_max population_max
## <dbl> <dbl> <dbl> <dbl>
## 1 3127 326429233 3127 326429233
# STEP 2: Convert to daily new totals rather than cumulative data
countyDailyPerCapita <- countyCumPerCapita %>%
group_by(state) %>%
arrange(date) %>%
mutate_at(vars(all_of(c("cpm", "dpm"))), ~ifelse(row_number()==1, ., .-lag(.))) %>%
ungroup()
# STEP 2a: Add rolling 7 aggregates and total cases/deaths
countyDailyPerCapita <- countyDailyPerCapita %>%
arrange(state, date) %>%
group_by(state) %>%
helperRollingAgg(origVar="cpm", newName="cpm7", k=7) %>%
helperRollingAgg(origVar="dpm", newName="dpm7", k=7) %>%
ungroup() %>%
mutate(cases=cpm*population/1000000, deaths=dpm*population/1000000)
# STEP 3: Create the clusters based on a population-limited subset of countyDailyPerCapita
# Use only population >= 25000 and date in August or earlier
# Ensure that 'state' (which holds countyFIPS) is not summed as a double
countyFiltered <- countyDailyPerCapita %>%
filter(population >= 25000, date <= as.Date("2020-08-31")) %>%
mutate(state=as.character(state))
# Check number of counties that will fail the test for 100 deaths per million or 5000 cases per million
is0 <- function(x) mean(x==0)
isltn <- function(x, n) mean(x<n)
islt100 <- function(x) isltn(x, n=100)
islt5000 <- function(x) isltn(x, n=5000)
countyFiltered %>%
group_by(state) %>%
summarize_at(c("cpm", "dpm"), sum) %>%
ungroup() %>%
summarize_at(vars(all_of(c("cpm", "dpm"))),
.funs=list(mean_is0=is0, mean_lt100=islt100, mean_lt5000=islt5000)
)
## # A tibble: 1 x 6
## cpm_mean_is0 dpm_mean_is0 cpm_mean_lt100 dpm_mean_lt100 cpm_mean_lt5000
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0.0352 0 0.266 0.140
## # ... with 1 more variable: dpm_mean_lt5000 <dbl>
A significant portion of the data will fall below the total burden thresholds (25% of counties have under 100 deaths per million, 14% of counties have under 5000 cases per million). These will tend to cluster together in a “lower burden” cluster which may be desirable. Per previous, deaths is the primary variable of interest, with both absolute and relative (shape by month) burden considered important:
# Run county-level clusters
countyCluster_km_test <- clusterStates(countyFiltered,
minShape=3,
ratioDeathvsCase = 5,
ratioTotalvsShape = 0.5,
minDeath=100,
minCase=5000,
hierarchical=FALSE,
nCenters=5,
testCenters=1:25,
iter.max=20,
nstart=10,
returnList=TRUE,
seed=2009081450
)
##
## Cluster means and counts
## 1 2 3 4 5
## . 501.00 473.00 53.00 199.00 365.00
## totalCases 0.69 0.32 1.23 1.14 0.52
## totalDeaths 1.36 0.36 9.67 4.76 1.86
## cases_3 0.01 0.02 0.08 0.02 0.02
## deaths_3 0.05 0.08 0.12 0.05 0.09
## cases_4 0.04 0.06 0.39 0.14 0.17
## deaths_4 0.35 0.54 2.07 0.85 1.45
## cases_5 0.05 0.08 0.17 0.14 0.17
## deaths_5 0.29 0.34 1.29 0.99 1.72
## cases_6 0.13 0.11 0.10 0.15 0.12
## deaths_6 0.36 0.52 0.62 0.70 0.73
## cases_7 0.39 0.27 0.14 0.32 0.24
## deaths_7 1.25 0.69 0.46 1.09 0.45
## cases_8 0.38 0.32 0.12 0.24 0.25
## deaths_8 2.61 0.58 0.43 1.32 0.51
There is no obvious break-point in the elbow plot or silhouette plot. Running with 5 clusters provides a segment with very high disease early, a segment with high disease late, two segments with mild-moderate disease (one early, one late), and one segment with low disease.
The assessClusters() function has been updated to (in a klunky manner) accept county-level data and produce basic charts:
# Check how 5 clusters look
clustVec_county_km_test <- countyCluster_km_test$objCluster$cluster
plotData_county_km_test <- assessClusters(clustVec_county_km_test,
dfState=countyFiltered %>%
group_by(state) %>%
summarize(pop=mean(population)) %>%
ungroup(),
dfBurden=select(countyFiltered, -population),
thruLabel="Sep 3, 2020",
isCounty=TRUE,
plotsTogether=TRUE
)
##
## Recency is defined as 2020-08-02 through current
##
## Recency is defined as 2020-08-02 through current
A handful of counties with under 10% of the US population were hit very hard and early with high per-capita deaths rates. While there have been surges in other counties, peaks in daily deaths per capita are roughly 25% or less of what the early-hard segment experienced.
Next steps are to further customize assessClusters() to allow for plotting the elements of county-level data (the line per county, as used with states, is messy and uninformative; an aggregate statistic such as 90% range is needed for sensible plotting).
The function has been updated, and is now incorporated in the results above. Next steps are to further explore the attributes of the county-level segments and impact of different numbers of segments.
Two preliminary steps will help for running multiple scenarios with county-level segments:
First, the helper function is created:
# Function for calling assessClusters() for county-level segments
helperAssessCountyClusters <- function(vecCluster,
dfPop,
dfBurden,
dfPopGeoVar="state",
dfPopPopVar="population",
...
) {
# FUNCTION ARGUMENTS:
# vecCluster: the named cluster vector
# dfPop: the data frame containing the population data
# dfBurden: the data frame containing the burden statistics by county and date
# ...: other arguments to pass to assessClusters()
# Run the process
plotAssess <- assessClusters(vecCluster,
dfState=dfPop %>%
group_by_at(dfPopGeoVar) %>%
summarize(pop=mean(get(dfPopPopVar))) %>%
ungroup(),
dfBurden=select(dfBurden, -population),
isCounty=TRUE,
...
)
# Return the plot object
plotAssess
}
# Test the function on the same data as before
helperACC_test <- helperAssessCountyClusters(countyCluster_km_test$objCluster$cluster,
dfPop=countyFiltered,
dfBurden=countyFiltered,
thruLabel="Sep 3, 2020",
plotsTogether=TRUE
)
##
## Recency is defined as 2020-08-02 through current
##
## Recency is defined as 2020-08-02 through current
# Confirm that outputs are the same
identical(plotData_county_km_test, helperACC_test)
## [1] TRUE
Next, an updated version of assessClusters() is written such that the components can be called individually on request rather than as a group:
# Helper function to make the overall cluster summary statistics
helperClusterSummaryPlots <- function(dfState,
dfPlot,
showMap,
clusterPlotsTogether,
weightedMean=TRUE,
mapLevel="states"
) {
# FUNCTION ARGUMENTS:
# dfState: contains the state/county-level data
# dfPlot: contains the joined data for plotting
# showMap: boolean for whether to create a map (if FALSE, segment membership counts are shown instead)
# clusterPlotsTogether: boolean, whether to put all four plots on the same page
# weightedMean: boolean, whether to create weighted mean by segment (if FALSE, median by segment is taken)
# mapLevel: the level to be used on the map
# Reorder the cluster levels in dfState to match dfPlot
# Convert factor order to match dfPlot (can be reordered by argument to the calling function)
dfState <- dfState %>%
mutate(cluster=factor(cluster, levels=levels(dfPlot$cluster)))
# Plot the segments on a map or show segment membership
if (showMap) {
if (mapLevel=="counties") {
dfMap <- dfState %>%
mutate(fips=stringr::str_pad(state, width=5, side="left", pad="0")) %>%
select(fips, cluster)
} else {
dfMap <- dfState
}
# Create the map
p1 <- usmap::plot_usmap(regions=mapLevel, data=dfMap, values="cluster") +
scale_fill_discrete("cluster") +
theme(legend.position="right")
} else {
p1 <- dfState %>%
count(cluster) %>%
ggplot(aes(x=fct_rev(cluster), y=n)) +
geom_col(aes(fill=cluster)) +
geom_text(aes(y=n/2, label=n)) +
coord_flip() +
labs(x="", y="# Counties", title="Membership by segment")
}
# Plot the population totals by segment
# Show population totals by cluster
p2 <- dfState %>%
group_by(cluster) %>%
summarize(pop=sum(pop)/1000000) %>%
ggplot(aes(x=fct_rev(cluster), y=pop)) +
geom_col(aes(fill=cluster)) +
geom_text(aes(y=pop/2, label=round(pop))) +
labs(y="2015 population (millions)", x="Cluster", title="Population by cluster (millions)") +
coord_flip()
# Plot the rolling 7-day mean daily disease burden by cluster
# Create the p3Data to be either median of all elements in cluster or weighted mean
p3 <- dfPlot %>%
select(date, cluster, cases=cpm7, deaths=dpm7, pop) %>%
pivot_longer((-c(date, cluster, pop))) %>%
filter(!is.na(value)) %>%
group_by(date, cluster, name) %>%
summarize(mdnValue=median(value), wtdValue=sum(pop*value)/sum(pop)) %>%
ggplot(aes(x=date, y=if(weightedMean) wtdValue else mdnValue)) +
geom_line(aes(group=cluster, color=cluster)) +
facet_wrap(~name, scales="free_y") +
labs(x="",
y="Rolling 7-day mean, per million",
title="Rolling 7-day mean daily disease burden, per million",
subtitle=paste0(if(weightedMean) "Weighted mean" else "Median",
" per day for states assigned to cluster"
)
) +
scale_x_date(date_breaks="1 months", date_labels="%b")
# Plot the total cases and total deaths by cluster
p4 <- dfPlot %>%
group_by(cluster) %>%
summarize(cases=sum(cases), deaths=sum(deaths)) %>%
pivot_longer(-cluster) %>%
ggplot(aes(x=fct_rev(cluster), y=value/1000)) +
geom_col(aes(fill=cluster)) +
geom_text(aes(y=value/2000, label=round(value/1000))) +
coord_flip() +
facet_wrap(~varMapper[name], scales="free_x") +
labs(x="Cluster", y="Burden (000s)", title="Total cases and deaths by segment")
# Place the plots together if plotsTogether is TRUE, otherwise just print
if (isTRUE(clusterPlotsTogether)) {
gridExtra::grid.arrange(p1, p2, p3, p4, nrow=2, ncol=2)
} else {
print(p1); print(p2); print(p3); print(p4)
}
}
# Helper function to make total and per capita by state (calls its own helper function)
helperCallTotalPerCapita <- function(dfPlot,
thruLabel
) {
# FUNCTION ARGUMENTS:
# dfPlot: the plotting data frame
# thruLabel: the date that data are through
# Plot total cases and total deaths by state, colored by cluster
helperBarDeathsCases(dfPlot,
numVars=c("cases", "deaths"),
title=paste0("Coronavirus impact by state through ", thruLabel),
xVar=c("state"),
fillVar=c("cluster")
)
# Plot cases per million and deaths per million by state, colored by cluster
helperBarDeathsCases(dfPlot,
numVars=c("cpm", "dpm"),
title=paste0("Coronavirus impact by state through ", thruLabel),
xVar=c("state"),
fillVar=c("cluster")
)
}
# Helper function to make recent vs. total plots
helperCallRecentvsTotal <- function(dfPlot,
thruLabel,
labelPoints,
recentTotalTogether
) {
# FUNCTION ARGUMENTS:
# dfPlot: the plotting data frame
# thruLabel: the date that data are through
# labelPoints: boolean, whether to label the individual points
# recentTotalTogether: boolean, whether to put these plots together on one page
# Plot last-30 vs total for cases per million by state, colored by cluster
p7 <- helperRecentvsTotal(dfPlot,
xVar="cpm",
yVar="newcpm",
title=paste0("Coronavirus burden through ", thruLabel),
labelPlot=labelPoints,
printPlot=FALSE
)
# Plot last-30 vs total for deaths per million by state, colored by cluster
p8 <- helperRecentvsTotal(dfPlot,
xVar="dpm",
yVar="newdpm",
title=paste0("Coronavirus burden through ", thruLabel),
labelPlot=labelPoints,
printPlot=FALSE
)
# Print the plots either as a single page or separately
if (isTRUE(recentTotalTogether)) {
gridExtra::grid.arrange(p7, p8, nrow=1)
} else {
print(p7); print(p8)
}
}
# Helper function to create total vs. elements plots
helperCallTotalvsElements <- function(dfPlot,
aggAndTotal,
clusterAggTogether,
...
) {
# FUNCTION ARGUMENTS:
# dfPlot: the plotting data frame
# aggAndTotal: boolean, should each individual line be plotted (if FALSE an 80% CI is plotted instead)
# clusterAggTogether: boolean, whether to print the plots all on a single page
# ...: any other arguments to pass to helperTotalvsElements (especially pctRibbon or aggFunc)
# Plot the cases per million on a free y-scale and a fixed y-scale
p9 <- helperTotalvsElements(dfPlot,
keyVar="cpm7",
aggAndTotal=aggAndTotal,
title="Cases per million, 7-day rolling mean",
printPlot=FALSE,
...
)
p10 <- helperTotalvsElements(dfPlot,
keyVar="cpm7",
aggAndTotal=aggAndTotal,
title="Cases per million, 7-day rolling mean",
facetScales="fixed",
printPlot=FALSE,
...
)
# Plot the deaths per million on a free y-scale and a fixed y-scale
p11 <- helperTotalvsElements(dfPlot,
keyVar="dpm7",
aggAndTotal=aggAndTotal,
title="Deaths per million, 7-day rolling mean",
printPlot=FALSE,
...
)
p12 <- helperTotalvsElements(dfPlot,
keyVar="dpm7",
aggAndTotal=aggAndTotal,
title="Deaths per million, 7-day rolling mean",
facetScales="fixed",
printPlot=FALSE,
...
)
if (isTRUE(clusterAggTogether)) {
gridExtra::grid.arrange(p9, p11, nrow=1)
gridExtra::grid.arrange(p10, p12, nrow=1)
} else {
print(p9); print(p10); print(p11); print(p12)
}
}
# Updated function for cluster assessment
# Main function creates the required data and calls helper functions
# 1. Call helper to create overall cluster summary
# 2. Call helper function for totals and per capita by state
# 3. Call helper for recent vs. overall cases
# 4. Call helper for evolution of segment aggregate and individual components
assessClusters <- function(clusters,
dfState=stateData,
dfBurden=cvFilteredPerCapita,
thruLabel="Aug 20, 2020",
isCounty=FALSE,
plotsTogether=FALSE,
clusterPlotsTogether=plotsTogether,
recentTotalTogether=plotsTogether,
clusterAggTogether=plotsTogether,
makeSummaryPlots=TRUE,
makeTotalvsPerCapitaPlots=!isCounty,
makeRecentvsTotalPlots=TRUE,
makeTotalvsElementPlots=TRUE,
showMap=!isCounty,
orderCluster=FALSE
) {
# FUNCTION ARGUMENTS:
# clusters: the named vector containing the clusters by state
# dfState: the file containing the states and populations
# dfBurden: the data containing the relevant per capita burden statistics by state-date
# thruLabel: label for plots for 'data through'
# isCounty: boolean, is this a plot of county-level data that have been named 'state'?
# FALSE means that it is normal state-level data
# plotsTogether: boolean, should plots be consolidated on fewer pages?
# clusterPlotsTogether: boolean, should plots p1-p4 be consolidated?
# recentTotalTogether: boolean, should recent total plots p7-p8 be consolidated?
# clusterAggTogether: boolean, should aggregate plots p9/p11 and p10/p12 be consolidated?
# makeSummaryPlots: boolean, should the summary plots be made?
# makeTotalvsPerCapitaPlots: boolean, should the total and per capita plots be produced?
# makeRecentvsTotalPlots: boolean, should the recent vs. total plots be created?
# makeTotalvsElementPlots: boolean, should the total vs. element plots be created?
# showMap: boolean, whether to create a map colored by cluster (will show segment counts otherwise)
# orderCluster: boolean, whether to order the clusters based on disease burden
# ...: any additional arguments passed from a calling function
# most common would be orderCluster=TRUE, a request for the clusters to be ordered by disease burden
# Attach the clusters to the state population data
dfState <- as.data.frame(clusters) %>%
set_names("cluster") %>%
rownames_to_column("state") %>%
inner_join(dfState, by="state") %>%
mutate(cluster=factor(cluster))
# Plot the rolling 7-day mean dialy disease burden by cluster
dfPlot <- dfState %>%
inner_join(dfBurden, by="state") %>%
tibble::as_tibble()
# Reorder the clusters if requested
if (orderCluster) {
dfPlot <- changeOrderLabel(dfPlot, grpVars="state")
}
# Call the helper function to make the overall summary statistics
if (makeSummaryPlots) {
helperClusterSummaryPlots(dfState=dfState,
dfPlot=dfPlot,
showMap=showMap,
clusterPlotsTogether=clusterPlotsTogether,
mapLevel=if(isCounty) "counties" else "states"
)
}
# These are primarily useful for state-level data and default to FALSE when isCounty is TRUE
if (makeTotalvsPerCapitaPlots) {
helperCallTotalPerCapita(dfPlot=dfPlot, thruLabel=thruLabel)
}
# Call the helper function to make recent vs. total plots
if (makeRecentvsTotalPlots) {
helperCallRecentvsTotal(dfPlot=dfPlot,
thruLabel=thruLabel,
labelPoints=!isCounty,
recentTotalTogether = recentTotalTogether
)
}
# Call the total vs. elements helper function
if (makeTotalvsElementPlots) {
helperCallTotalvsElements(dfPlot=dfPlot,
aggAndTotal=!isCounty,
clusterAggTogether=clusterPlotsTogether
)
}
# Return the plotting data frame
dfPlot
}
The updated function is then run on the existing data:
# Test the function on the same data as before
helperACC_test_02 <- helperAssessCountyClusters(countyCluster_km_test$objCluster$cluster,
dfPop=countyFiltered,
dfBurden=countyFiltered,
thruLabel="Sep 3, 2020",
plotsTogether=TRUE
)
##
## Recency is defined as 2020-08-02 through current
##
## Recency is defined as 2020-08-02 through current
# Confirm that outputs are the same
identical(plotData_county_km_test, helperACC_test_02)
## [1] TRUE
The updated function is tested for producing only the main summary, and for creating a county-level plot:
# Test the function on the same data as before, but for only the overall summary
helperACC_test_03 <- helperAssessCountyClusters(countyCluster_km_test$objCluster$cluster,
dfPop=countyFiltered,
dfBurden=countyFiltered,
thruLabel="Sep 3, 2020",
plotsTogether=TRUE,
makeTotalvsPerCapitaPlots=FALSE,
makeRecentvsTotalPlots=FALSE,
makeTotalvsElementPlots=FALSE
)
# Test the function on the same data as before, with the plots each on their own page
helperACC_test_04 <- helperAssessCountyClusters(countyCluster_km_test$objCluster$cluster,
dfPop=countyFiltered,
dfBurden=countyFiltered,
thruLabel="Sep 3, 2020",
plotsTogether=TRUE,
makeTotalvsPerCapitaPlots=FALSE,
makeRecentvsTotalPlots=FALSE,
makeTotalvsElementPlots=FALSE,
showMap=TRUE,
clusterPlotsTogether=FALSE
)
The functions are now prepared to more easily handle multiple scenarios of clustering, which is the next step.
Neither the elbow plot nor the silhouette plot had any obvious break points suggesting an optimal number of clusters. Suppose that kmeans from 2-8 clusters is run, with the assessment being only the single-page summary:
# Clustering amounts to test
testN <- 2:8
# Create a container to hold the clustering object
county_km_list <- vector("list", length(testN))
names(county_km_list) <- paste0("nCenters", stringr::str_pad(testN, width=2, side="left", pad="0"))
# Run county-level clusters for each element in testN
nRun <- 1
for (nCenters in testN) {
county_km_list[[nRun]] <- clusterStates(countyFiltered,
minShape=3,
ratioDeathvsCase = 5,
ratioTotalvsShape = 0.5,
minDeath=100,
minCase=5000,
hierarchical=FALSE,
nCenters=nCenters,
iter.max=20,
nstart=10,
returnList=TRUE,
seed=2009111335
)
nRun <- nRun + 1
}
##
## Cluster means and counts
## 1 2
## . 233.00 1358.00
## totalCases 1.10 0.54
## totalDeaths 6.02 1.17
## cases_3 0.03 0.01
## deaths_3 0.07 0.07
## cases_4 0.23 0.08
## deaths_4 1.29 0.69
## cases_5 0.16 0.09
## deaths_5 1.22 0.67
## cases_6 0.12 0.12
## deaths_6 0.68 0.52
## cases_7 0.25 0.31
## deaths_7 0.77 0.87
## cases_8 0.20 0.32
## deaths_8 0.97 1.35
##
## Cluster means and counts
## 1 2 3
## . 590.00 188.00 813.00
## totalCases 0.72 1.14 0.42
## totalDeaths 1.55 6.54 1.04
## cases_3 0.01 0.04 0.02
## deaths_3 0.04 0.08 0.09
## cases_4 0.04 0.24 0.11
## deaths_4 0.35 1.38 0.95
## cases_5 0.06 0.17 0.12
## deaths_5 0.30 1.25 0.97
## cases_6 0.13 0.12 0.11
## deaths_6 0.38 0.69 0.63
## cases_7 0.39 0.24 0.25
## deaths_7 1.27 0.77 0.58
## cases_8 0.37 0.19 0.29
## deaths_8 2.50 0.83 0.52
##
## Cluster means and counts
## 1 2 3 4
## . 357.00 530.00 581.00 123.00
## totalCases 0.69 0.74 0.33 1.25
## totalDeaths 2.69 1.56 0.44 7.68
## cases_3 0.02 0.01 0.02 0.04
## deaths_3 0.06 0.04 0.10 0.08
## cases_4 0.18 0.04 0.07 0.26
## deaths_4 1.27 0.34 0.74 1.42
## cases_5 0.19 0.06 0.09 0.16
## deaths_5 1.72 0.28 0.51 1.13
## cases_6 0.12 0.13 0.11 0.12
## deaths_6 0.79 0.36 0.53 0.65
## cases_7 0.24 0.39 0.27 0.24
## deaths_7 0.56 1.30 0.64 0.81
## cases_8 0.24 0.37 0.31 0.18
## deaths_8 0.60 2.61 0.60 0.91
##
## Cluster means and counts
## 1 2 3 4 5
## . 196.00 489.00 353.00 500.00 53.00
## totalCases 1.14 0.32 0.53 0.69 1.23
## totalDeaths 4.77 0.38 1.92 1.35 9.67
## cases_3 0.02 0.02 0.03 0.01 0.08
## deaths_3 0.05 0.08 0.09 0.05 0.12
## cases_4 0.13 0.06 0.18 0.04 0.39
## deaths_4 0.82 0.55 1.47 0.35 2.07
## cases_5 0.14 0.08 0.17 0.05 0.17
## deaths_5 0.97 0.36 1.76 0.29 1.29
## cases_6 0.15 0.11 0.11 0.13 0.10
## deaths_6 0.70 0.55 0.71 0.36 0.62
## cases_7 0.33 0.27 0.23 0.39 0.14
## deaths_7 1.10 0.69 0.44 1.26 0.46
## cases_8 0.24 0.32 0.25 0.38 0.12
## deaths_8 1.36 0.58 0.50 2.61 0.43
##
## Cluster means and counts
## 1 2 3 4 5 6
## . 483.00 337.00 37.00 283.00 148.00 303.00
## totalCases 0.32 0.53 1.27 0.94 1.11 0.56
## totalDeaths 0.38 1.91 10.65 2.29 5.57 0.97
## cases_3 0.02 0.03 0.07 0.01 0.03 0.01
## deaths_3 0.08 0.09 0.12 0.03 0.07 0.06
## cases_4 0.06 0.18 0.41 0.04 0.19 0.04
## deaths_4 0.56 1.51 2.06 0.33 1.16 0.40
## cases_5 0.08 0.18 0.17 0.07 0.16 0.05
## deaths_5 0.36 1.80 1.28 0.40 1.21 0.26
## cases_6 0.11 0.11 0.09 0.16 0.13 0.10
## deaths_6 0.54 0.70 0.65 0.50 0.72 0.31
## cases_7 0.27 0.23 0.14 0.42 0.27 0.36
## deaths_7 0.70 0.40 0.45 1.76 0.87 0.82
## cases_8 0.32 0.25 0.12 0.31 0.21 0.42
## deaths_8 0.57 0.47 0.43 1.98 0.99 2.97
##
## Cluster means and counts
## 1 2 3 4 5 6 7
## . 40.00 167.00 326.00 297.00 282.00 160.00 319.00
## totalCases 1.26 1.13 0.76 0.63 0.58 0.35 0.27
## totalDeaths 10.42 5.31 1.62 1.24 1.99 1.03 0.19
## cases_3 0.08 0.02 0.01 0.01 0.02 0.04 0.02
## deaths_3 0.12 0.06 0.04 0.05 0.04 0.20 0.08
## cases_4 0.42 0.17 0.05 0.04 0.16 0.16 0.05
## deaths_4 2.11 1.08 0.41 0.35 0.93 2.42 0.27
## cases_5 0.17 0.15 0.07 0.05 0.20 0.09 0.07
## deaths_5 1.30 1.09 0.41 0.25 2.01 0.70 0.24
## cases_6 0.09 0.14 0.17 0.10 0.12 0.09 0.09
## deaths_6 0.63 0.68 0.62 0.30 0.99 0.36 0.31
## cases_7 0.13 0.29 0.39 0.37 0.23 0.25 0.26
## deaths_7 0.43 0.92 1.87 0.79 0.47 0.39 0.46
## cases_8 0.11 0.22 0.30 0.42 0.25 0.28 0.33
## deaths_8 0.41 1.16 1.59 3.12 0.52 0.55 0.51
##
## Cluster means and counts
## 1 2 3 4 5 6 7 8
## . 285.00 296.00 148.00 278.00 162.00 21.00 283.00 118.00
## totalCases 0.26 0.59 0.35 0.59 1.12 1.43 0.61 1.14
## totalDeaths 0.17 1.03 1.13 2.11 3.39 12.37 1.11 6.42
## cases_3 0.02 0.01 0.04 0.02 0.01 0.08 0.01 0.04
## deaths_3 0.09 0.04 0.21 0.04 0.03 0.15 0.05 0.07
## cases_4 0.06 0.05 0.17 0.17 0.05 0.40 0.04 0.24
## deaths_4 0.28 0.47 2.52 0.97 0.45 1.98 0.35 1.39
## cases_5 0.07 0.08 0.09 0.21 0.08 0.16 0.05 0.17
## deaths_5 0.25 0.41 0.72 2.07 0.49 1.21 0.25 1.26
## cases_6 0.09 0.16 0.09 0.12 0.16 0.11 0.10 0.13
## deaths_6 0.25 0.74 0.36 0.92 0.52 0.73 0.30 0.71
## cases_7 0.25 0.36 0.25 0.22 0.41 0.14 0.37 0.24
## deaths_7 0.30 1.78 0.34 0.46 1.50 0.49 0.81 0.76
## cases_8 0.33 0.32 0.27 0.25 0.29 0.11 0.42 0.18
## deaths_8 0.48 1.33 0.50 0.50 2.01 0.44 3.10 0.81
# Takes a list and creates the desired plot
assessFunction <- function(x) {
helperAssessCountyClusters(x$objCluster$cluster,
dfPop=countyFiltered,
dfBurden=countyFiltered,
thruLabel="Sep 3, 2020",
plotsTogether=TRUE,
makeTotalvsPerCapitaPlots=FALSE,
makeRecentvsTotalPlots=FALSE,
makeTotalvsElementPlots=FALSE,
showMap=FALSE
)
}
# Run the single-page assessment for each element of county_km_list
# Output will be meaningless; run just for the side-effects (plotting)
discardThis <- sapply(county_km_list, FUN=assessFunction)
While k-means results are highly dependent on starting conditions (seed), some general trends are observed:
Five segments appears to be a sweet spot qualitatively:
Of interest, there does not appear to be a segment with late an high death. The small number of populous counties hit early tend to be the primary locations with the highest death rates.
Suppose that two additional plots are created to show cumulative disease burden per capita by segment:
# Test the function on the same data as before, with the plots each on their own page
helperACC_test_04 <- helperAssessCountyClusters(countyCluster_km_test$objCluster$cluster,
dfPop=countyFiltered,
dfBurden=countyFiltered,
thruLabel="Sep 3, 2020",
plotsTogether=TRUE,
makeTotalvsPerCapitaPlots=FALSE,
makeRecentvsTotalPlots=FALSE,
makeTotalvsElementPlots=FALSE,
showMap=TRUE,
clusterPlotsTogether=TRUE
)
helperACC_test_04 %>%
select(cluster, date, pop, cases, deaths) %>%
group_by(cluster, date) %>%
summarize_if(is.numeric, sum, na.rm=TRUE) %>%
arrange(date) %>%
mutate(cpmcum=cumsum(cases)*1000000/pop, dpmcum=cumsum(deaths)*1000000/pop) %>%
ungroup() %>%
select(cluster, date, cases=cpmcum, deaths=dpmcum) %>%
pivot_longer(-c(cluster, date)) %>%
ggplot(aes(x=date, y=value, color=cluster)) +
geom_line(size=1) +
geom_text(data=~filter(., date==max(date)),
aes(x=date+lubridate::days(2), label=round(value)),
size=3,
hjust=0
) +
labs(x="", title="Cumulative burden per million people by segment", y="") +
facet_wrap(~c("cases"="Cases per million", "deaths"="Deaths per million")[name], scales="free_y") +
scale_x_date(date_breaks="1 months", date_labels="%b", expand=expand_scale(c(0, 0.1)))
The segments have had very different experienced with the disease, ranging from 100 deaths per million in counties covering ~20% of US population to 1000-2000 deaths per million in counties covering ~15% and ~10% respectively of US population. The remaining counties covering ~60% of US population have ~400 deaths per million.
How well do the county segments explain some of the differences observed by state? For example, NY/NJ had heavy and early disease, LA has had sustained disease, and FL/GA has had moderate and late disease:
stateCombos <- list(c("NY", "NJ", "CT", "RI", "VT", "MA", "NH", "PA"), c("LA"), c("FL", "GA", "AL"))
fipsCounty <- helperACC_test_04 %>%
mutate(state=stringr::str_pad(state, width=5, side="left", pad="0")) %>%
count(state, cluster) %>%
select(fips=state, cluster)
for (x in stateCombos) {
p1 <- usmap::plot_usmap(regions="counties", include=x, values="cluster", data=fipsCounty) +
scale_fill_discrete("Cluster", limits=c(1:5)) +
labs(title=paste0("County clusters for states: ", paste0(x, collapse=", ")))
print(p1)
}
Next steps are to explore the evolution of deaths by state and cluster.
Suppose that NYS is examined for coronavirus burden by segment:
# Helper function for pretty labeling and position of bar chart labels
helperTextLabel <- function(vrbl,
groupVar=NULL,
groupValue=NULL,
divBy=1,
roundTo=0,
pctMin=0.1,
pctOut=0.025,
...
) {
# FUNCTION ARGUMENTS
# vrbl: the variable to be plotted
# groupVar: the grouping variable for deciding whether to use middle of the bar or outside
# NULL means no grouping variable
# groupValue: the value to be used for groupVar
# divBy: the amount to divide the variable by (e.g., 1000 means y-axis will be in 000s)
# roundTo: the amount to round the displayed totals to
# pctMin: any amount above this will be centered, any amount below this will be outside
# pctOut: amounts on the outside will be this amount of maximum outside
# ...: any other arguments to pass "as is" to geom_text
# Create a relevant geom_text object
# 1. label should be vrbl/divBy, rounded to roundTo and using commas as needed
# 2. y position should be centered midpoint OR left-justified end of bar
geom_text(data=if(!is.null(groupVar)) ~filter(., get(groupVar) %in% groupValue) else ~.,
aes(y=ifelse(get(vrbl)>=pctMin*max(get(vrbl)),
get(vrbl)/2/divBy,
get(vrbl)/divBy + pctOut*max(get(vrbl))/divBy
),
label=scales::comma(get(vrbl)/divBy, accuracy=10**-roundTo),
hjust=ifelse(get(vrbl)>=pctMin*max(get(vrbl)), NA, 0)
),
...
)
}
# Create an appropriate database for analysis
# Get the clusters, including adding the unassigned cluster
fullClusterData <- helperACC_test_04 %>%
filter(date==max(date)) %>%
select(fipsCounty=state, cluster) %>%
mutate(fipsCounty=stringr::str_pad(fipsCounty, width=5, side="left", pad="0")) %>%
right_join(select(usmap::countypop, fipsCounty=fips, state=abbr, countyName=county, pop=pop_2015)) %>%
mutate(cluster=factor(ifelse(is.na(cluster), 999, cluster)))
## Joining, by = "fipsCounty"
# Merge in the disease data
fullStateData <- countyDailyPerCapita %>%
mutate(fipsCounty=stringr::str_pad(state, width=5, side="left", pad="0")) %>%
select(-state, -population) %>%
right_join(fullClusterData)
## Joining, by = "fipsCounty"
# Function to run the data for a given state(s)
stateCountySummary <- function(states,
df=fullStateData,
keyDate=as.Date("2020-08-31"),
showQuadrants=TRUE,
showCumulative=FALSE,
facetCumulativeByState=FALSE,
showAllFactorLevels=FALSE,
returnData=FALSE
) {
# FUNCTION ARGUMENTS:
# states: the states to include in the analysis
# df: the data frame or tibble to use
# keyDate: date to use for getting cluster and population by county
# showQuadrants: boolean, whether to create and show the four-quadrants of p1-p4
# showCumulative: boolean, whether to show the cumulative deaths per capita by segment
# facetCumulativeByState: boolean, whether to facet by state for the cumulative plot
# showAllFactorLevels: boolean, whether to show all factor levels in the plot legend
# returnData: boolean, should the data frame be returned?
# Create a base data frame
baseStateData <-df %>%
arrange(fipsCounty, date) %>%
group_by(fipsCounty) %>%
mutate(cumDeaths=cumsum(deaths)) %>%
filter(state %in% c(states))
# Create a frame by state and cluster
stateClusterData <- baseStateData %>%
group_by(state, cluster, date) %>%
summarize(n=n(), dpm7=sum(dpm7*pop)/sum(pop), cumDeaths=sum(cumDeaths), pop=sum(pop)) %>%
ungroup()
# Create a frame by cluster
clusterData <- baseStateData %>%
group_by(cluster, date) %>%
summarize(n=n(), dpm7=sum(dpm7*pop)/sum(pop), cumDeaths=sum(cumDeaths), pop=sum(pop)) %>%
ungroup()
# Create and show p1-p4 if requested
if (showQuadrants) {
# Plot 1: Number of counties in cluster
p1 <- clusterData %>%
filter(date==keyDate) %>%
ggplot(aes(x=fct_rev(cluster), y=n, fill=cluster)) +
geom_col() +
geom_text(aes(y=n/2, label=n)) +
coord_flip() +
labs(x="Cluster", y="# Counties", title="Counties by Cluster")
if (showAllFactorLevels) p1 <- p1 + scale_fill_discrete(drop=FALSE)
# Plot 2: Total population and average population by cluster
p2 <- clusterData %>%
filter(date==keyDate) %>%
mutate(popPer=pop/n) %>%
select(cluster, pop, popPer) %>%
pivot_longer(-cluster) %>%
ggplot(aes(x=fct_rev(cluster), y=value/1000, fill=cluster)) +
geom_col() +
helperTextLabel("value", divBy=1000, groupVar="name", groupValue="popPer", pctMin=0.2, size=3.5) +
helperTextLabel("value", divBy=1000, groupVar="name", groupValue="pop", pctMin=0.2, size=3.5) +
coord_flip() +
facet_wrap(~c("pop"="Population", "popPer"="Population per county")[name], scales="free_x") +
labs(x="Cluster", y="Population (000)", title="Population by Cluster")
if (showAllFactorLevels) p2 <- p2 + scale_fill_discrete(drop=FALSE)
# Plot 3: Deaths and deaths per million (total) by cluster
p3 <- clusterData %>%
filter(date==keyDate) %>%
mutate(dpm=1000000*cumDeaths/pop) %>%
select(cluster, cumDeaths, dpm) %>%
pivot_longer(-cluster) %>%
ggplot(aes(x=fct_rev(cluster), y=value, fill=cluster)) +
geom_col() +
helperTextLabel("value", groupVar="name", groupValue="cumDeaths", pctMin=0.2, size=3.5) +
helperTextLabel("value", groupVar="name", groupValue="dpm", pctMin=0.2, size=3.5) +
coord_flip() +
facet_wrap(~c("cumDeaths"="1. Total deaths", "dpm"="2. Deaths per million")[name],
scales="free_x"
) +
labs(x="Cluster", y="", title="Deaths (total and per million) by cluster")
if (showAllFactorLevels) p3 <- p3 + scale_fill_discrete(drop=FALSE)
# Plot 4: Evolution of deaths by time
p4 <- clusterData %>%
filter(!is.na(dpm7)) %>%
ggplot(aes(x=date, y=dpm7, group=cluster, color=cluster)) +
geom_line(size=1) +
labs(x="", y="Deaths per million", title="Deaths per million, 7-day rolling mean") +
scale_x_date(date_breaks="1 months", date_labels="%b")
if (showAllFactorLevels) p4 <- p4 + scale_color_discrete(drop=FALSE)
# Create a 2x2 summary of the cluster data
gridExtra::grid.arrange(p1, p2, p3, p4, nrow=2, top=paste0("States: ", paste0(states, collapse=", ")))
}
# If requested, create a cumulative deaths per capita curve
if (showCumulative) {
p5 <- (if (facetCumulativeByState) stateClusterData else clusterData) %>%
mutate(cumdpm=1000000*cumDeaths/pop) %>%
ggplot(aes(x=date, y=cumdpm)) +
geom_line(aes(group=cluster, color=cluster), lwd=1) +
geom_text(data=~filter(., date==max(date, na.rm=TRUE)),
aes(x=date+lubridate::days(3), label=scales::comma(cumdpm, accuracy=1), color=cluster),
size=3,
hjust=0
) +
labs(x="",
y="Cumulative deaths per million",
title=paste0("Cumulative deaths per million for: ", paste0(states, collapse=", "))
) +
scale_x_date(date_breaks="1 months", date_labels="%b", expand=expand_scale(c(0, 0.15)))
if (facetCumulativeByState) p5 <- p5 + facet_wrap(~state)
if (showAllFactorLevels) p5 <- p5 + scale_color_discrete(drop=FALSE)
print(p5)
}
# Return the data file if requested (by state-cluster if facetCumulativeByState, by cluster otherwise)
if (returnData) {
if (facetCumulativeByState) stateClusterData else stateClusterData
}
}
# Run for NY
stateCountySummary(states=c("NY"))
# Run for FL-GA
stateCountySummary(states=c("FL", "GA"))
# Run for LA-MS-AL
stateCountySummary(states=c("LA", "MS", "AL"))
# Run for East North Central region
stateCountySummary(usmap::.east_north_central)
The population differences by segment are consistent with the general trends observed by state. Next steps are to add the cumulative deaths per million curve.
The function has been updated to optionally show cumulative deaths by segment:
# Create cumulative for NY
stateCountySummary(states=c("NY"), showQuadrants=FALSE, showCumulative=TRUE)
# Create cumulative for FL-GA
stateCountySummary(states=c("FL", "GA"),
showQuadrants=FALSE,
showCumulative=TRUE,
facetCumulativeByState=TRUE
)
# Create cumulative for LA-MS-AL
stateCountySummary(states=c("LA", "MS", "AL"),
showQuadrants=FALSE,
showCumulative=TRUE,
facetCumulativeByState = TRUE
)
# Run for East North Central region
stateCountySummary(states=usmap::.east_north_central,
showQuadrants=FALSE,
showCumulative=TRUE,
facetCumulativeByState = TRUE
)
Next steps are to control the color palette so the same colors are used even when a level of the factor is not present in the data.
The option drop=FALSE inside the scale_*_discrete(drop=FALSE) appears to achieve this. Further, it would be useful to convert the levels of the factor so that they consistently go from most to least disease on a cumulative basis. The refactoring can be performed as:
# Function to reorder and relabel factors
changeOrderLabel <- function(df,
fctVar="cluster",
grpVars=c(),
burdenVar="dpm",
wgtVar="pop",
exclfct="999"
) {
# FUNCTION ARGUMENTS
# df: the data frame
# fctVar: the factor variable
# grpVars: the variable that the data are aurrently at (e.g., "fipsCounty" for county-level in df)
# burdenVar: the disease burden variable for sorting
# wgtVar: the weight variable for sorting
# exclfct: the factor lele to be excluded from analysis
# General approach
# 1. Data are aggregated to c(fctVar, grpVars) as x=sum(burdenVar*wgtVar) and y=mean(wgtVar)
# 2. Data are aggregated to fctVar as z=sum(x)/sum(y)
# 3. Factors are reordered from high to low on z, with the excluded factor added back last (if it exists)
# Check if exclfct exists in the factor variable
fctDummy <- exclfct %in% levels(df[, fctVar, drop=TRUE])
# Create the summary of impact by segment
newLevels <- df %>%
filter(get(fctVar) != exclfct) %>%
group_by_at(vars(all_of(c(fctVar, grpVars)))) %>%
summarize(x=sum(get(burdenVar)*get(wgtVar)), y=mean(get(wgtVar))) %>%
group_by_at(vars(all_of(fctVar))) %>%
summarize(z=sum(x)/sum(y)) %>%
arrange(-z) %>%
pull(fctVar) %>%
as.character()
# Add back the dummy factor at the end (if it exists)
if (fctDummy) newLevels <- c(newLevels, exclfct)
# Reassign the levels in df
df %>%
mutate(!!fctVar:=factor(get(fctVar), levels=newLevels, labels=newLevels))
}
And, the plots can use all factor levels by enforcing drop=FALSE inside an appropriate scale_color_ or scale_fill_ object. This has been updated for:
And, the evaluation process can be run using the updated functions:
# Run for select upper midwest states
stateCountySummary(states=c("MN", "ND", "SD", "WI"),
df=changeOrderLabel(fullStateData, grpVars="fipsCounty"),
showQuadrants=TRUE,
showCumulative=TRUE,
facetCumulativeByState = TRUE,
showAllFactorLevels = TRUE
)
And since the reordering makes for easier plot interpretation, the other summaries are also re-run:
# Create cumulative for NY
stateCountySummary(states=c("NY"),
df=changeOrderLabel(fullStateData, grpVars="fipsCounty"),
showQuadrants=TRUE,
showCumulative=TRUE
)
# Create cumulative for FL-GA
stateCountySummary(states=c("FL", "GA"),
df=changeOrderLabel(fullStateData, grpVars="fipsCounty"),
showQuadrants=TRUE,
showCumulative=TRUE,
facetCumulativeByState=TRUE
)
# Create cumulative for LA-MS-AL
stateCountySummary(states=c("LA", "MS", "AL"),
df=changeOrderLabel(fullStateData, grpVars="fipsCounty"),
showQuadrants=TRUE,
showCumulative=TRUE,
facetCumulativeByState = TRUE
)
# Run for East North Central region
stateCountySummary(states=usmap::.east_north_central,
df=changeOrderLabel(fullStateData, grpVars="fipsCounty"),
showQuadrants=TRUE,
showCumulative=TRUE,
facetCumulativeByState = TRUE
)
And, the main cluster summary plots are reproduced also, with the factors reordered:
# Get the desired cluster order
clustOrder <- changeOrderLabel(fullStateData, grpVars="fipsCounty") %>%
filter(cluster!=999) %>%
pull(cluster) %>%
fct_drop() %>%
levels()
# Test the function on the same data as before, with the plots each on their own page
dummy_Data_NotUse <- helperAssessCountyClusters(factor(countyCluster_km_test$objCluster$cluster,
levels=clustOrder
),
dfPop=countyFiltered,
dfBurden=countyFiltered,
thruLabel="Sep 3, 2020",
plotsTogether=TRUE,
showMap=FALSE
)
##
## Recency is defined as 2020-08-02 through current
##
## Recency is defined as 2020-08-02 through current
This requires using an output as an input as a workaround. Next steps are to update assessClusters() and ancillary functions to include arguments for auto-creation of proper cluster levels as part of the routine.
The assessClusters() function has been updated to allow for ordering of clusters by deaths per million. Updated summaries are produced with the relevant argument:
helperACC_test_05 <- helperAssessCountyClusters(countyCluster_km_test$objCluster$cluster,
dfPop=countyFiltered,
dfBurden=countyFiltered,
thruLabel="Sep 3, 2020",
plotsTogether=TRUE,
showMap=TRUE,
clusterPlotsTogether=TRUE,
orderCluster=TRUE
)
##
## Recency is defined as 2020-08-02 through current
##
## Recency is defined as 2020-08-02 through current
The clusters are now in a consistent order from highest to lowest aggregate disease burden.
Further, a function is written to create the integrated county data for plotting in the stateCountySummary() function:
# Create the cluster-state data
helperMakeClusterStateData <- function(dfPlot,
dfPop=usmap::countypop,
dfBurden=countyDailyPerCapita,
orderCluster=FALSE
) {
# FUNCTION ARGUMENTS:
# dfPlot: the raw plotting data (which can have factors already reordered)
# dfPop: source for full county data with population
# dfBurden: source for disease burden by geography
# orderCluster: boolean, whether to order the cluster factor order by burden
# Merge in the counties that were previously excluded from segmentation
df <- dfPlot %>%
filter(date==max(date)) %>%
select(fipsCounty=state, cluster) %>%
mutate(fipsCounty=stringr::str_pad(fipsCounty, width=5, side="left", pad="0")) %>%
right_join(select(dfPop, fipsCounty=fips, state=abbr, countyName=county, pop=pop_2015)) %>%
mutate(cluster=factor(ifelse(is.na(cluster), 999, as.character(cluster))))
# Merge in the disease data
df <- dfBurden %>%
mutate(fipsCounty=stringr::str_pad(state, width=5, side="left", pad="0")) %>%
select(-state, -population) %>%
right_join(df)
# Order if requested
if (orderCluster) df <- changeOrderLabel(df, grpVars="fipsCounty")
# Return the relevant frame
df
}
clusterStateData <- helperMakeClusterStateData(helperACC_test_05, orderCluster=TRUE)
## Joining, by = "fipsCounty"
## Joining, by = "fipsCounty"
# Run for East North Central region
stateCountySummary(states=usmap::.east_north_central,
df=clusterStateData,
showQuadrants=TRUE,
showCumulative=TRUE,
facetCumulativeByState = TRUE
)
Next steps are to download the latest data and apply to the existing segments. New data are downloaded, and a function written to convert the downloaded data in to a file for analysis:
# Helper function to read and convert
helperReadConvert <- function(file,
valueName
) {
# FUNCTION ARGUMENTS:
# file: the file for reading and converting
# valueName: name for the values column of the pivoted data
# Read file
df <- readr::read_csv(file)
glimpse(df)
# Conversion of the raw data
dfPivot <- df %>%
rename(countyName=`County Name`, state=State) %>%
pivot_longer(-c(countyFIPS, countyName, state, stateFIPS),
names_to="date",
values_to=valueName
) %>%
mutate(date=lubridate::mdy(date))
glimpse(dfPivot)
# Conversion of the pivoted data
dfConverted <- countyLevelEvolution(dfPivot,
burdenVar=valueName,
inclStates=NULL,
topN=5,
printPlot=FALSE,
returnData=TRUE
)
# Return the converted file
dfConverted
}
# Function to read and convert raw data from USA Facts
readUSAFacts <- function(caseFile,
deathFile,
stateClusters=NULL
) {
# FUNCTION ARGUMENTS:
# caseFile: the location of the downloaded cases dataset
# deathsFile: the location of the downloaded deaths dataset
# stateClusters: a field 'cluster' will be created from this named vector (if NULL, 'cluster' will be NA)
# Read cases file
cnvCases <- helperReadConvert(caseFile, valueName="cumCases")
# Read deaths file
cnvDeaths <- helperReadConvert(deathFile, valueName="cumDeaths")
# Join the files so there is countyFIPS-county-population-date and cumCases cumDeaths
# Also, add the state segments as 'cluster' if requested
dfBurden <- cnvDeaths %>%
select(-countyName.x, -countyName.y, -bold, cumDeathPer=burden) %>%
inner_join(cnvCases %>%
select(-countyName.x, -countyName.y, -bold, cumCasesPer=burden),
by=c("countyFIPS", "stateFIPS", "county", "state", "date", "population")
) %>%
mutate(cluster=if(is.null(stateClusters)) NA else stateClusters[state])
# Return the burdens file
dfBurden
}
# Get the clusters used previously in burdenUS
oldClusters <- burdenUS %>%
count(state, cluster) %>%
select(-n)
oldcvec <- pull(oldClusters, cluster)
names(oldcvec) <- oldClusters$state
# File read in from previous data
burden_20200903 <- readUSAFacts(
caseFile="./RInputFiles/Coronavirus/covid_confirmed_usafacts_downloaded_20200903.csv",
deathFile="./RInputFiles/Coronavirus/covid_deaths_usafacts_downloaded_20200903.csv",
stateClusters=oldcvec
)
## Parsed with column specification:
## cols(
## .default = col_double(),
## `County Name` = col_character(),
## State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 3,195
## Variables: 228
## $ countyFIPS <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20` <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20` <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20` <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20` <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20` <dbl> 0, 1, 4, 0, 0, 0, 0, 0, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20` <dbl> 0, 4, 4, 0, 0, 1, 0, 1, 2, 10, 1, 1, 0, 0, 1, 1, 0, 1...
## $ `3/26/20` <dbl> 0, 6, 5, 0, 0, 2, 2, 1, 2, 13, 1, 4, 1, 0, 1, 1, 0, 1...
## $ `3/27/20` <dbl> 0, 6, 5, 0, 0, 5, 2, 1, 3, 15, 1, 7, 1, 0, 1, 3, 0, 1...
## $ `3/28/20` <dbl> 0, 6, 10, 0, 0, 5, 2, 1, 3, 17, 1, 7, 1, 0, 2, 4, 0, ...
## $ `3/29/20` <dbl> 0, 6, 15, 0, 0, 5, 2, 1, 3, 27, 2, 8, 1, 0, 2, 5, 0, ...
## $ `3/30/20` <dbl> 0, 7, 18, 0, 2, 5, 2, 1, 9, 36, 2, 10, 2, 0, 2, 5, 0,...
## $ `3/31/20` <dbl> 0, 7, 19, 0, 3, 5, 2, 1, 9, 36, 2, 11, 3, 0, 2, 5, 0,...
## $ `4/1/20` <dbl> 0, 10, 23, 0, 3, 5, 2, 1, 11, 45, 2, 13, 4, 2, 3, 6, ...
## $ `4/2/20` <dbl> 0, 10, 25, 0, 4, 6, 2, 1, 12, 67, 4, 14, 4, 2, 7, 6, ...
## $ `4/3/20` <dbl> 0, 12, 28, 1, 4, 9, 2, 1, 20, 81, 5, 15, 4, 3, 8, 7, ...
## $ `4/4/20` <dbl> 0, 12, 29, 2, 4, 10, 2, 1, 21, 87, 6, 15, 4, 7, 9, 7,...
## $ `4/5/20` <dbl> 0, 12, 34, 2, 7, 10, 2, 1, 24, 90, 6, 18, 5, 9, 9, 7,...
## $ `4/6/20` <dbl> 0, 12, 38, 3, 7, 10, 2, 1, 38, 96, 6, 20, 6, 9, 9, 9,...
## $ `4/7/20` <dbl> 0, 12, 42, 3, 8, 10, 2, 2, 48, 102, 6, 20, 6, 10, 9, ...
## $ `4/8/20` <dbl> 0, 12, 49, 3, 9, 10, 3, 3, 52, 140, 7, 22, 6, 10, 11,...
## $ `4/9/20` <dbl> 0, 17, 59, 7, 11, 11, 4, 3, 54, 161, 7, 25, 6, 13, 11...
## $ `4/10/20` <dbl> 0, 17, 59, 9, 11, 12, 4, 3, 54, 171, 7, 27, 7, 13, 11...
## $ `4/11/20` <dbl> 0, 19, 66, 10, 13, 12, 4, 6, 57, 184, 7, 30, 9, 15, 1...
## $ `4/12/20` <dbl> 0, 19, 71, 10, 16, 13, 4, 7, 60, 200, 9, 30, 10, 19, ...
## $ `4/13/20` <dbl> 0, 19, 78, 10, 17, 15, 6, 8, 61, 212, 9, 33, 10, 19, ...
## $ `4/14/20` <dbl> 0, 23, 87, 11, 17, 16, 8, 8, 62, 216, 9, 33, 12, 21, ...
## $ `4/15/20` <dbl> 0, 25, 98, 13, 19, 17, 8, 11, 62, 227, 10, 37, 13, 22...
## $ `4/16/20` <dbl> 0, 25, 102, 14, 23, 18, 8, 11, 63, 234, 11, 37, 13, 2...
## $ `4/17/20` <dbl> 0, 25, 103, 15, 23, 20, 8, 13, 63, 236, 12, 37, 13, 2...
## $ `4/18/20` <dbl> 0, 25, 109, 18, 26, 20, 9, 13, 66, 240, 12, 39, 14, 2...
## $ `4/19/20` <dbl> 0, 27, 114, 20, 28, 21, 9, 14, 72, 246, 12, 42, 14, 2...
## $ `4/20/20` <dbl> 0, 28, 117, 22, 32, 22, 11, 14, 80, 257, 12, 43, 17, ...
## $ `4/21/20` <dbl> 0, 30, 123, 28, 32, 26, 11, 15, 83, 259, 12, 44, 18, ...
## $ `4/22/20` <dbl> 0, 32, 132, 29, 33, 29, 11, 17, 85, 270, 12, 46, 21, ...
## $ `4/23/20` <dbl> 0, 33, 143, 30, 33, 31, 12, 19, 88, 275, 12, 47, 22, ...
## $ `4/24/20` <dbl> 0, 36, 147, 32, 34, 31, 12, 21, 89, 282, 12, 49, 25, ...
## $ `4/25/20` <dbl> 0, 37, 154, 33, 35, 31, 12, 28, 90, 284, 12, 49, 27, ...
## $ `4/26/20` <dbl> 0, 37, 161, 33, 38, 34, 12, 32, 90, 285, 14, 51, 32, ...
## $ `4/27/20` <dbl> 0, 39, 168, 35, 42, 34, 12, 34, 90, 289, 14, 51, 39, ...
## $ `4/28/20` <dbl> 0, 40, 171, 37, 42, 34, 12, 45, 92, 290, 15, 52, 39, ...
## $ `4/29/20` <dbl> 0, 42, 173, 37, 42, 36, 12, 51, 93, 290, 15, 52, 39, ...
## $ `4/30/20` <dbl> 0, 42, 174, 39, 42, 37, 13, 53, 93, 290, 15, 52, 43, ...
## $ `5/1/20` <dbl> 0, 42, 175, 42, 42, 39, 14, 65, 93, 290, 15, 52, 49, ...
## $ `5/2/20` <dbl> 0, 45, 181, 43, 42, 40, 14, 92, 98, 294, 15, 54, 49, ...
## $ `5/3/20` <dbl> 0, 48, 187, 45, 42, 40, 14, 105, 105, 300, 16, 57, 49...
## $ `5/4/20` <dbl> 0, 53, 188, 45, 42, 40, 16, 114, 105, 302, 16, 58, 51...
## $ `5/5/20` <dbl> 0, 53, 189, 47, 43, 40, 18, 120, 114, 304, 17, 60, 54...
## $ `5/6/20` <dbl> 0, 58, 196, 47, 43, 42, 18, 130, 114, 306, 18, 61, 54...
## $ `5/7/20` <dbl> 0, 61, 205, 51, 44, 44, 18, 155, 120, 308, 18, 63, 56...
## $ `5/8/20` <dbl> 0, 67, 208, 53, 44, 44, 21, 162, 123, 311, 21, 63, 59...
## $ `5/9/20` <dbl> 0, 68, 216, 58, 45, 44, 22, 178, 124, 314, 22, 64, 61...
## $ `5/10/20` <dbl> 0, 74, 222, 59, 46, 44, 23, 189, 124, 316, 22, 65, 66...
## $ `5/11/20` <dbl> 0, 84, 224, 61, 46, 45, 26, 196, 125, 319, 24, 67, 67...
## $ `5/12/20` <dbl> 0, 91, 227, 67, 46, 45, 26, 224, 126, 324, 24, 69, 69...
## $ `5/13/20` <dbl> 0, 93, 231, 69, 46, 45, 28, 230, 127, 324, 24, 73, 72...
## $ `5/14/20` <dbl> 0, 103, 243, 74, 46, 45, 28, 249, 128, 326, 25, 74, 7...
## $ `5/15/20` <dbl> 0, 103, 244, 79, 49, 45, 32, 258, 129, 326, 26, 75, 8...
## $ `5/16/20` <dbl> 0, 110, 254, 79, 50, 45, 35, 271, 130, 328, 27, 77, 8...
## $ `5/17/20` <dbl> 0, 110, 254, 81, 50, 46, 35, 272, 130, 328, 27, 77, 8...
## $ `5/18/20` <dbl> 0, 120, 260, 85, 50, 47, 40, 285, 133, 329, 28, 79, 8...
## $ `5/19/20` <dbl> 0, 127, 262, 90, 51, 47, 52, 295, 133, 329, 29, 80, 9...
## $ `5/20/20` <dbl> 0, 136, 270, 96, 52, 47, 64, 312, 136, 330, 30, 83, 1...
## $ `5/21/20` <dbl> 0, 147, 270, 100, 52, 48, 71, 321, 136, 330, 31, 84, ...
## $ `5/22/20` <dbl> 0, 149, 271, 104, 55, 49, 89, 329, 137, 330, 33, 85, ...
## $ `5/23/20` <dbl> 0, 155, 273, 105, 58, 49, 105, 335, 138, 330, 33, 86,...
## $ `5/24/20` <dbl> 0, 159, 274, 110, 59, 49, 111, 344, 141, 336, 33, 87,...
## $ `5/25/20` <dbl> 0, 173, 276, 116, 62, 49, 141, 368, 147, 337, 33, 87,...
## $ `5/26/20` <dbl> 0, 189, 277, 122, 66, 51, 167, 380, 150, 338, 33, 90,...
## $ `5/27/20` <dbl> 0, 192, 281, 130, 71, 53, 176, 391, 152, 340, 33, 93,...
## $ `5/28/20` <dbl> 0, 205, 281, 132, 71, 58, 185, 392, 152, 349, 34, 97,...
## $ `5/29/20` <dbl> 0, 212, 282, 147, 71, 60, 201, 396, 153, 352, 36, 99,...
## $ `5/30/20` <dbl> 0, 216, 283, 150, 72, 61, 203, 402, 154, 353, 37, 100...
## $ `5/31/20` <dbl> 0, 220, 288, 164, 75, 62, 209, 410, 157, 355, 37, 100...
## $ `6/1/20` <dbl> 0, 233, 292, 172, 76, 63, 209, 414, 164, 358, 38, 103...
## $ `6/2/20` <dbl> 0, 238, 292, 175, 76, 63, 212, 416, 165, 358, 38, 104...
## $ `6/3/20` <dbl> 0, 239, 292, 177, 76, 63, 215, 419, 165, 359, 38, 105...
## $ `6/4/20` <dbl> 0, 241, 293, 177, 76, 63, 217, 421, 167, 360, 38, 107...
## $ `6/5/20` <dbl> 0, 248, 296, 183, 76, 64, 219, 431, 169, 363, 38, 108...
## $ `6/6/20` <dbl> 0, 259, 304, 190, 77, 70, 225, 442, 174, 373, 40, 108...
## $ `6/7/20` <dbl> 0, 265, 313, 193, 77, 72, 232, 449, 176, 378, 42, 110...
## $ `6/8/20` <dbl> 0, 272, 320, 197, 79, 73, 238, 455, 178, 383, 42, 111...
## $ `6/9/20` <dbl> 0, 282, 325, 199, 85, 75, 243, 464, 180, 391, 42, 117...
## $ `6/10/20` <dbl> 0, 295, 331, 208, 89, 79, 248, 471, 182, 401, 42, 118...
## $ `6/11/20` <dbl> 0, 312, 343, 214, 93, 87, 253, 484, 184, 417, 42, 121...
## $ `6/12/20` <dbl> 0, 323, 353, 221, 97, 95, 258, 499, 188, 427, 46, 122...
## $ `6/13/20` <dbl> 0, 331, 361, 226, 100, 102, 276, 517, 190, 438, 47, 1...
## $ `6/14/20` <dbl> 0, 357, 364, 234, 104, 110, 302, 536, 195, 453, 51, 1...
## $ `6/15/20` <dbl> 0, 368, 383, 238, 111, 116, 307, 544, 204, 475, 53, 1...
## $ `6/16/20` <dbl> 0, 373, 389, 245, 116, 121, 310, 551, 206, 485, 53, 1...
## $ `6/17/20` <dbl> 0, 375, 392, 251, 118, 123, 313, 554, 208, 486, 53, 1...
## $ `6/18/20` <dbl> 0, 400, 401, 263, 121, 130, 320, 566, 210, 501, 55, 1...
## $ `6/19/20` <dbl> 0, 411, 413, 266, 126, 139, 320, 569, 210, 507, 58, 1...
## $ `6/20/20` <dbl> 0, 431, 420, 272, 126, 143, 327, 572, 211, 516, 58, 1...
## $ `6/21/20` <dbl> 0, 434, 430, 272, 127, 149, 327, 576, 213, 521, 58, 1...
## $ `6/22/20` <dbl> 0, 442, 437, 277, 129, 153, 328, 578, 215, 528, 58, 1...
## $ `6/23/20` <dbl> 0, 453, 450, 280, 135, 159, 329, 581, 216, 534, 58, 1...
## $ `6/24/20` <dbl> 0, 469, 464, 288, 141, 168, 336, 584, 220, 543, 58, 1...
## $ `6/25/20` <dbl> 0, 479, 477, 305, 149, 176, 351, 588, 233, 549, 64, 1...
## $ `6/26/20` <dbl> 0, 488, 515, 312, 153, 184, 351, 594, 236, 559, 68, 1...
## $ `6/27/20` <dbl> 0, 498, 555, 317, 161, 188, 358, 600, 245, 561, 69, 2...
## $ `6/28/20` <dbl> 0, 503, 575, 317, 162, 189, 358, 602, 245, 561, 70, 2...
## $ `6/29/20` <dbl> 0, 527, 643, 322, 165, 199, 365, 605, 269, 585, 73, 2...
## $ `6/30/20` <dbl> 0, 537, 680, 325, 170, 208, 365, 607, 276, 590, 74, 2...
## $ `7/1/20` <dbl> 0, 553, 703, 326, 174, 218, 367, 607, 278, 595, 77, 2...
## $ `7/2/20` <dbl> 0, 561, 751, 335, 179, 222, 369, 610, 288, 611, 82, 2...
## $ `7/3/20` <dbl> 0, 568, 845, 348, 189, 230, 372, 625, 330, 625, 88, 2...
## $ `7/4/20` <dbl> 0, 591, 863, 350, 190, 234, 373, 626, 340, 637, 88, 2...
## $ `7/5/20` <dbl> 0, 615, 881, 352, 193, 239, 373, 634, 362, 642, 100, ...
## $ `7/6/20` <dbl> 0, 618, 911, 356, 197, 247, 373, 634, 384, 655, 105, ...
## $ `7/7/20` <dbl> 0, 644, 997, 360, 199, 255, 373, 634, 395, 656, 106, ...
## $ `7/8/20` <dbl> 0, 651, 1056, 366, 201, 262, 374, 639, 411, 660, 114,...
## $ `7/9/20` <dbl> 0, 661, 1131, 371, 211, 282, 375, 646, 445, 672, 115,...
## $ `7/10/20` <dbl> 0, 670, 1187, 381, 218, 292, 381, 648, 465, 679, 118,...
## $ `7/11/20` <dbl> 0, 684, 1224, 398, 224, 307, 382, 654, 500, 690, 128,...
## $ `7/12/20` <dbl> 0, 706, 1294, 403, 228, 331, 383, 655, 526, 693, 129,...
## $ `7/13/20` <dbl> 0, 728, 1359, 413, 231, 350, 383, 660, 566, 702, 136,...
## $ `7/14/20` <dbl> 0, 746, 1414, 428, 236, 366, 385, 661, 589, 712, 140,...
## $ `7/15/20` <dbl> 0, 756, 1518, 441, 242, 389, 386, 664, 655, 718, 145,...
## $ `7/16/20` <dbl> 0, 780, 1599, 459, 247, 424, 389, 669, 675, 731, 152,...
## $ `7/17/20` <dbl> 0, 789, 1689, 463, 255, 440, 393, 672, 720, 742, 157,...
## $ `7/18/20` <dbl> 0, 827, 1819, 483, 264, 458, 397, 678, 741, 756, 165,...
## $ `7/19/20` <dbl> 0, 842, 1937, 495, 269, 482, 398, 686, 785, 762, 173,...
## $ `7/20/20` <dbl> 0, 857, 2013, 503, 279, 507, 400, 689, 832, 767, 179,...
## $ `7/21/20` <dbl> 0, 865, 2102, 514, 283, 524, 401, 695, 869, 774, 182,...
## $ `7/22/20` <dbl> 0, 886, 2196, 518, 287, 547, 407, 701, 891, 782, 184,...
## $ `7/23/20` <dbl> 0, 905, 2461, 534, 289, 585, 408, 706, 934, 789, 193,...
## $ `7/24/20` <dbl> 0, 921, 2513, 539, 303, 615, 411, 711, 999, 797, 205,...
## $ `7/25/20` <dbl> 0, 932, 2662, 552, 318, 637, 414, 720, 1062, 810, 207...
## $ `7/26/20` <dbl> 0, 942, 2708, 562, 324, 646, 415, 724, 1113, 821, 209...
## $ `7/27/20` <dbl> 0, 965, 2770, 569, 334, 669, 416, 730, 1194, 825, 220...
## $ `7/28/20` <dbl> 0, 974, 2835, 575, 337, 675, 429, 734, 1243, 836, 221...
## $ `7/29/20` <dbl> 0, 974, 2835, 575, 338, 675, 429, 734, 1244, 836, 221...
## $ `7/30/20` <dbl> 0, 1002, 3028, 585, 352, 731, 435, 747, 1336, 848, 23...
## $ `7/31/20` <dbl> 0, 1015, 3101, 598, 363, 767, 437, 753, 1450, 859, 23...
## $ `8/1/20` <dbl> 0, 1030, 3142, 602, 368, 792, 443, 757, 1480, 861, 24...
## $ `8/2/20` <dbl> 0, 1052, 3223, 610, 372, 813, 445, 765, 1580, 868, 25...
## $ `8/3/20` <dbl> 0, 1066, 3265, 612, 382, 830, 446, 766, 1612, 875, 26...
## $ `8/4/20` <dbl> 0, 1073, 3320, 614, 389, 836, 449, 766, 1646, 882, 26...
## $ `8/5/20` <dbl> 0, 1073, 3380, 615, 392, 839, 452, 769, 1683, 886, 27...
## $ `8/6/20` <dbl> 0, 1096, 3438, 619, 421, 874, 458, 771, 1741, 893, 28...
## $ `8/7/20` <dbl> 0, 1113, 3504, 624, 424, 909, 462, 774, 1777, 899, 29...
## $ `8/8/20` <dbl> 0, 1134, 3564, 628, 434, 923, 471, 773, 1836, 904, 29...
## $ `8/9/20` <dbl> 0, 1215, 3606, 630, 446, 934, 472, 779, 1860, 906, 30...
## $ `8/10/20` <dbl> 0, 1215, 3714, 631, 450, 947, 474, 782, 1883, 909, 30...
## $ `8/11/20` <dbl> 0, 1215, 3736, 643, 455, 958, 489, 785, 1914, 916, 30...
## $ `8/12/20` <dbl> 0, 1241, 3776, 646, 464, 967, 500, 788, 1935, 918, 31...
## $ `8/13/20` <dbl> 0, 1250, 3813, 651, 469, 977, 501, 790, 1959, 919, 32...
## $ `8/14/20` <dbl> 0, 1252, 3860, 656, 477, 989, 502, 796, 1975, 922, 32...
## $ `8/15/20` <dbl> 0, 1262, 3909, 663, 483, 996, 503, 807, 2019, 925, 33...
## $ `8/16/20` <dbl> 0, 1273, 3948, 671, 483, 1005, 504, 811, 2037, 927, 3...
## $ `8/17/20` <dbl> 0, 1274, 3960, 672, 488, 1008, 504, 814, 2055, 928, 3...
## $ `8/18/20` <dbl> 0, 1291, 3977, 674, 490, 1034, 512, 814, 2107, 937, 3...
## $ `8/19/20` <dbl> 0, 1293, 4002, 683, 503, 1049, 530, 814, 2159, 941, 3...
## $ `8/20/20` <dbl> 0, 1293, 4035, 690, 507, 1077, 534, 814, 2214, 949, 3...
## $ `8/21/20` <dbl> 0, 1293, 4054, 690, 509, 1083, 534, 814, 2228, 952, 3...
## $ `8/22/20` <dbl> 0, 1322, 4115, 699, 516, 1096, 536, 822, 2276, 957, 3...
## $ `8/23/20` <dbl> 0, 1324, 4147, 702, 523, 1099, 536, 824, 2286, 958, 3...
## $ `8/24/20` <dbl> 0, 1351, 4167, 720, 526, 1135, 536, 825, 2327, 971, 3...
## $ `8/25/20` <dbl> 0, 1355, 4190, 724, 527, 1160, 536, 826, 2345, 973, 3...
## $ `8/26/20` <dbl> 0, 1366, 4265, 732, 530, 1195, 537, 833, 2400, 983, 3...
## $ `8/27/20` <dbl> 0, 1377, 4311, 739, 533, 1213, 538, 839, 2413, 1011, ...
## $ `8/28/20` <dbl> 0, 1389, 4347, 745, 535, 1219, 541, 840, 2443, 1017, ...
## $ `8/29/20` <dbl> 0, 1400, 4424, 753, 540, 1248, 546, 855, 2499, 1024, ...
## $ `8/30/20` <dbl> 0, 1438, 4525, 757, 550, 1277, 550, 864, 2533, 1027, ...
## $ `8/31/20` <dbl> 0, 1442, 4545, 757, 554, 1287, 551, 866, 2567, 1033, ...
## $ `9/1/20` <dbl> 0, 1453, 4568, 764, 558, 1303, 559, 871, 2619, 1041, ...
## Observations: 715,680
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumCases <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## Parsed with column specification:
## cols(
## .default = col_double(),
## `County Name` = col_character(),
## State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 3,195
## Variables: 228
## $ countyFIPS <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/26/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/27/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/28/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/29/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/30/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/31/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/1/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/2/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/3/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/4/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/5/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/6/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/7/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/8/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/9/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/10/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/11/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/12/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/13/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/14/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/15/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/16/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/17/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/18/20` <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/19/20` <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/20/20` <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/21/20` <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 13, 0, 0, 0, 1, 0, 0, 0, 1...
## $ `4/22/20` <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 16, 0, 0, 0, 1, 0, 1, 0, 1...
## $ `4/23/20` <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 16, 0, 1, 0, 1, 1, 1, 0, 1...
## $ `4/24/20` <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 17, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/25/20` <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/26/20` <dbl> 0, 2, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/27/20` <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/28/20` <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 19, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/29/20` <dbl> 0, 3, 2, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/30/20` <dbl> 0, 3, 3, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/1/20` <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/2/20` <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/3/20` <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/4/20` <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/5/20` <dbl> 0, 3, 5, 1, 0, 0, 0, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/6/20` <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/7/20` <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/8/20` <dbl> 0, 3, 5, 1, 1, 0, 1, 3, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/9/20` <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/10/20` <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/11/20` <dbl> 0, 3, 6, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/12/20` <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 21, 0, 1, 1, 1, 1, 1, 1, 2...
## $ `5/13/20` <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 22, 0, 1, 2, 1, 1, 1, 1, 2...
## $ `5/14/20` <dbl> 0, 3, 8, 1, 1, 0, 1, 8, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/15/20` <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/16/20` <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/17/20` <dbl> 0, 3, 8, 1, 1, 1, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/18/20` <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/19/20` <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/20/20` <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/21/20` <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/22/20` <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/23/20` <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/24/20` <dbl> 0, 3, 9, 1, 1, 1, 1, 12, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/25/20` <dbl> 0, 3, 9, 1, 1, 1, 3, 12, 3, 24, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/26/20` <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/27/20` <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/28/20` <dbl> 0, 3, 9, 1, 1, 1, 4, 15, 3, 24, 2, 1, 8, 2, 2, 1, 1, ...
## $ `5/29/20` <dbl> 0, 3, 9, 1, 1, 1, 4, 16, 3, 24, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/30/20` <dbl> 0, 4, 9, 1, 1, 1, 4, 17, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/31/20` <dbl> 0, 4, 9, 1, 1, 1, 5, 18, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `6/1/20` <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 25, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/2/20` <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/3/20` <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/4/20` <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/5/20` <dbl> 0, 5, 9, 1, 1, 1, 7, 21, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/6/20` <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/7/20` <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/8/20` <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/9/20` <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/10/20` <dbl> 0, 6, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/11/20` <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/12/20` <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/13/20` <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/14/20` <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/15/20` <dbl> 0, 6, 9, 1, 1, 1, 9, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/16/20` <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/17/20` <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/18/20` <dbl> 0, 8, 9, 1, 1, 1, 9, 25, 4, 26, 5, 3, 11, 4, 2, 1, 1,...
## $ `6/19/20` <dbl> 0, 8, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 11, 4, 2, 1, 1...
## $ `6/20/20` <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/21/20` <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/22/20` <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/23/20` <dbl> 0, 9, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 4, 2, 1, 1...
## $ `6/24/20` <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/25/20` <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/26/20` <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/27/20` <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/28/20` <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/29/20` <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/30/20` <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/1/20` <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/2/20` <dbl> 0, 13, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/3/20` <dbl> 0, 13, 10, 2, 1, 1, 10, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/4/20` <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/5/20` <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/6/20` <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/7/20` <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/8/20` <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/9/20` <dbl> 0, 14, 11, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/10/20` <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/11/20` <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/12/20` <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/13/20` <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/14/20` <dbl> 0, 18, 12, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/15/20` <dbl> 0, 19, 13, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/16/20` <dbl> 0, 20, 14, 3, 2, 1, 11, 32, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/17/20` <dbl> 0, 21, 14, 3, 2, 1, 11, 33, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/18/20` <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/19/20` <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/20/20` <dbl> 0, 21, 15, 4, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/21/20` <dbl> 0, 21, 16, 4, 2, 1, 11, 34, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/22/20` <dbl> 0, 21, 16, 4, 2, 1, 12, 34, 6, 34, 7, 5, 12, 8, 2, 1,...
## $ `7/23/20` <dbl> 0, 21, 17, 4, 2, 1, 12, 35, 6, 34, 7, 5, 12, 9, 2, 1,...
## $ `7/24/20` <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/25/20` <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/26/20` <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/27/20` <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/28/20` <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/29/20` <dbl> 0, 21, 21, 4, 2, 3, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/30/20` <dbl> 0, 21, 21, 5, 2, 3, 12, 36, 8, 38, 8, 6, 12, 9, 5, 1,...
## $ `7/31/20` <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/1/20` <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/2/20` <dbl> 0, 21, 23, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/3/20` <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/4/20` <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 12, 38, 8, 7, 12, 9, 5, 1...
## $ `8/5/20` <dbl> 0, 22, 24, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/6/20` <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/7/20` <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/8/20` <dbl> 0, 22, 26, 5, 5, 4, 12, 37, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/9/20` <dbl> 0, 22, 27, 5, 5, 4, 12, 37, 14, 38, 8, 8, 12, 9, 5, 1...
## $ `8/10/20` <dbl> 0, 22, 28, 5, 5, 4, 12, 37, 17, 38, 9, 9, 12, 10, 5, ...
## $ `8/11/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 19, 38, 9, 12, 12, 10, 5,...
## $ `8/12/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/13/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/14/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/15/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/16/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/17/20` <dbl> 0, 23, 32, 6, 6, 5, 14, 37, 23, 38, 9, 12, 12, 10, 5,...
## $ `8/18/20` <dbl> 0, 23, 33, 6, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/19/20` <dbl> 0, 23, 33, 7, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/20/20` <dbl> 0, 23, 34, 7, 6, 5, 14, 37, 25, 38, 10, 12, 12, 10, 5...
## $ `8/21/20` <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 5...
## $ `8/22/20` <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/23/20` <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/24/20` <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 27, 38, 10, 13, 12, 11, 6...
## $ `8/25/20` <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 28, 39, 10, 13, 12, 11, 6...
## $ `8/26/20` <dbl> 0, 23, 36, 7, 6, 7, 14, 37, 28, 39, 10, 13, 12, 12, 6...
## $ `8/27/20` <dbl> 0, 23, 37, 7, 6, 7, 14, 37, 30, 39, 12, 13, 12, 13, 6...
## $ `8/28/20` <dbl> 0, 23, 39, 7, 6, 9, 14, 37, 32, 40, 12, 13, 12, 13, 6...
## $ `8/29/20` <dbl> 0, 23, 40, 7, 7, 9, 14, 37, 35, 40, 12, 13, 12, 14, 6...
## $ `8/30/20` <dbl> 0, 23, 40, 7, 7, 10, 14, 37, 35, 40, 12, 13, 12, 14, ...
## $ `8/31/20` <dbl> 0, 23, 42, 7, 8, 11, 14, 37, 36, 40, 12, 13, 12, 14, ...
## $ `9/1/20` <dbl> 0, 24, 42, 7, 8, 11, 14, 37, 38, 40, 12, 13, 12, 14, ...
## Observations: 715,680
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumDeaths <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
# Comparison of components
burden_20200903 %>%
anti_join(select(burdenUS, countyFIPS)) %>%
count(state, county)
## Joining, by = "countyFIPS"
## # A tibble: 0 x 3
## # ... with 3 variables: state <chr>, county <chr>, n <int>
table(rowSums((burden_20200903 %>% semi_join(select(burdenUS, countyFIPS)) != burdenUS)))
## Joining, by = "countyFIPS"
##
## 0
## 700448
The files are identical, with the exception that burden_20200903 includes the county names that are mismatched to usmap::countypop. A function is written to analyze the burden data:
# Function to plot the burden by cluster
plotBurdenData <- function(df,
maxDate,
minPop
) {
# FUNCTION ARGUMENTS:
# df: the burden data
# maxDate: the maximum date
# minPop: the minimum population
# Create a date label
dateLabel <- format(as.Date(maxDate), "%b %d, %Y")
# Plot the data as of maxDate
p1 <- df %>%
filter(date==maxDate, population>=minPop) %>%
ggplot(aes(x=cumCasesPer, y=cumDeathPer)) +
geom_point(aes(size=log10(population), color=factor(cluster)), alpha=0.25) +
geom_smooth(method="lm", se=FALSE, aes(weight=population, color=factor(cluster))) +
scale_size_continuous("log10\nCounty\nPop") +
labs(x=paste0("Cases per million as of ", dateLabel),
y=paste0("Deaths per million as of ", dateLabel),
title=paste0("County-level per-capita coronavirus burden as of ", dateLabel),
subtitle=paste0("Filtered to counties with population of at least ", minPop)
) +
facet_wrap(~cluster)
print(p1)
# Further, the total burden by cluster is plotted
p2 <- df %>%
filter(date==maxDate, population>=0) %>%
group_by(cluster) %>%
summarize(population=sum(population),
cumCases=sum(cumCases),
cumDeath=sum(cumDeaths),
mdn_CasesPer=median(cumCasesPer),
mdn_DeathPer=median(cumDeathPer)
) %>%
mutate(mean_CasesPer=1000000*cumCases/population, mean_DeathPer=1000000*cumDeath/population) %>%
select(cluster, starts_with("mdn"), starts_with("mean")) %>%
pivot_longer(-cluster) %>%
mutate(aggType=stringr::str_replace(name, "_.*", ""),
metType=stringr::str_replace(name, ".*_", "")
) %>%
pivot_wider(c(cluster, aggType), names_from="metType", values_from="value") %>%
ggplot(aes(x=CasesPer, y=DeathPer)) +
geom_point(aes(color=factor(cluster)), size=5) +
labs(x=paste0("Cases per million as of ", dateLabel),
y=paste0("Deaths per million as of ", dateLabel),
title=paste0("Cluster-level per-capita coronavirus burden as of ", dateLabel),
subtitle="State-level clusters based on hierarchical (method=`complete`)"
) +
scale_color_discrete("Cluster") +
ylim(c(0, NA)) +
xlim(c(0, NA)) +
facet_wrap(~c("mdn"="Median of all counties in segment", "mean"="Segment-level metric")[aggType])
print(p2)
}
plotBurdenData(burden_20200903, maxDate="2020-09-01", minPop=10000)
And a function is written to create the file used for further analysis:
# Create the filtered county data frame
createCountyFiltered <- function(df,
maxDate,
minPop
) {
# FUNCTION ARGUMENTS:
# df: data frame with the burden data
# maxDate: the last date to use
# minPop: the minimum population to use
# STEP 1: Select only desired variables from df
df <- df %>%
select(state=countyFIPS, date, cpm=cumCasesPer, dpm=cumDeathPer, population) %>%
arrange(state, date)
# STEP 1a: Confirm that there are no duplicates and that every county has the same dates
# This should be 1 provided that there are no duplicates
cat("\nThis should be 1, otherwise there are duplicates\n")
df %>%
count(state, date) %>%
pull(n) %>%
max() %>%
print()
cat("\n\n\n")
# This should have no standard deviation if the same number of records exist on every day
cat("\nMin and max should be the same if there are records everywhere on every day\n")
df %>%
mutate(n=1) %>%
group_by(date) %>%
summarize(n=sum(n), population=sum(population)) %>%
summarize_at(vars(all_of(c("n", "population"))), .funs=list(min=min, max=max)) %>%
print()
cat("\n\n\n")
# STEP 2: Convert to daily new totals rather than cumulative data
dfDaily <- df %>%
group_by(state) %>%
arrange(date) %>%
mutate_at(vars(all_of(c("cpm", "dpm"))), ~ifelse(row_number()==1, ., .-lag(.))) %>%
ungroup()
# STEP 2a: Add rolling 7 aggregates and total cases/deaths
dfDaily <- dfDaily %>%
arrange(state, date) %>%
group_by(state) %>%
helperRollingAgg(origVar="cpm", newName="cpm7", k=7) %>%
helperRollingAgg(origVar="dpm", newName="dpm7", k=7) %>%
ungroup() %>%
mutate(cases=cpm*population/1000000, deaths=dpm*population/1000000)
# STEP 3: Filter the data frame for date and minimum population
# Ensure that 'state' (which holds countyFIPS) is not summed as a double
dfFiltered <- dfDaily %>%
filter(population >= minPop, date <= as.Date(maxDate)) %>%
mutate(state=as.character(state))
# Check number of counties that will fail the test for 100 deaths per million or 5000 cases per million
is0 <- function(x) mean(x==0)
isltn <- function(x, n) mean(x<n)
islt100 <- function(x) isltn(x, n=100)
islt5000 <- function(x) isltn(x, n=5000)
dfFiltered %>%
group_by(state) %>%
summarize_at(c("cpm", "dpm"), sum) %>%
ungroup() %>%
summarize_at(vars(all_of(c("cpm", "dpm"))),
.funs=list(mean_is0=is0, mean_lt100=islt100, mean_lt5000=islt5000)
) %>%
print()
# Return the data file
dfFiltered
}
countyFiltered_20200903 <- createCountyFiltered(burden_20200903, maxDate="2020-08-31", minPop=25000)
##
## This should be 1, otherwise there are duplicates
## [1] 1
##
##
##
##
## Min and max should be the same if there are records everywhere on every day
## # A tibble: 1 x 4
## n_min population_min n_max population_max
## <dbl> <dbl> <dbl> <dbl>
## 1 3127 326429233 3127 326429233
##
##
##
## # A tibble: 1 x 6
## cpm_mean_is0 dpm_mean_is0 cpm_mean_lt100 dpm_mean_lt100 cpm_mean_lt5000
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0.0352 0 0.266 0.140
## # ... with 1 more variable: dpm_mean_lt5000 <dbl>
all.equal(countyFiltered_20200903, countyFiltered)
## [1] TRUE
With the functions in place, the next step is to download new data and run it through the preparatory functions:
# File read in from previous data
burden_20200917 <- readUSAFacts(
caseFile="./RInputFiles/Coronavirus/covid_confirmed_usafacts_downloaded_20200917.csv",
deathFile="./RInputFiles/Coronavirus/covid_deaths_usafacts_downloaded_20200917.csv",
stateClusters=oldcvec
)
## Parsed with column specification:
## cols(
## .default = col_double(),
## `County Name` = col_character(),
## State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 3,195
## Variables: 242
## $ countyFIPS <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20` <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20` <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20` <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20` <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20` <dbl> 0, 1, 4, 0, 0, 0, 0, 0, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20` <dbl> 0, 4, 4, 0, 0, 1, 0, 1, 2, 10, 1, 1, 0, 0, 1, 1, 0, 1...
## $ `3/26/20` <dbl> 0, 6, 5, 0, 0, 2, 2, 1, 2, 13, 1, 4, 1, 0, 1, 1, 0, 1...
## $ `3/27/20` <dbl> 0, 6, 5, 0, 0, 5, 2, 1, 3, 15, 1, 7, 1, 0, 1, 3, 0, 1...
## $ `3/28/20` <dbl> 0, 6, 10, 0, 0, 5, 2, 1, 3, 17, 1, 7, 1, 0, 2, 4, 0, ...
## $ `3/29/20` <dbl> 0, 6, 15, 0, 0, 5, 2, 1, 3, 27, 2, 8, 1, 0, 2, 5, 0, ...
## $ `3/30/20` <dbl> 0, 7, 18, 0, 2, 5, 2, 1, 9, 36, 2, 10, 2, 0, 2, 5, 0,...
## $ `3/31/20` <dbl> 0, 7, 19, 0, 3, 5, 2, 1, 9, 36, 2, 11, 3, 0, 2, 5, 0,...
## $ `4/1/20` <dbl> 0, 10, 23, 0, 3, 5, 2, 1, 11, 45, 2, 13, 4, 2, 3, 6, ...
## $ `4/2/20` <dbl> 0, 10, 25, 0, 4, 6, 2, 1, 12, 67, 4, 14, 4, 2, 7, 6, ...
## $ `4/3/20` <dbl> 0, 12, 28, 1, 4, 9, 2, 1, 20, 81, 5, 15, 4, 3, 8, 7, ...
## $ `4/4/20` <dbl> 0, 12, 29, 2, 4, 10, 2, 1, 21, 87, 6, 15, 4, 7, 9, 7,...
## $ `4/5/20` <dbl> 0, 12, 34, 2, 7, 10, 2, 1, 24, 90, 6, 18, 5, 9, 9, 7,...
## $ `4/6/20` <dbl> 0, 12, 38, 3, 7, 10, 2, 1, 38, 96, 6, 20, 6, 9, 9, 9,...
## $ `4/7/20` <dbl> 0, 12, 42, 3, 8, 10, 2, 2, 48, 102, 6, 20, 6, 10, 9, ...
## $ `4/8/20` <dbl> 0, 12, 49, 3, 9, 10, 3, 3, 52, 140, 7, 22, 6, 10, 11,...
## $ `4/9/20` <dbl> 0, 17, 59, 7, 11, 11, 4, 3, 54, 161, 7, 25, 6, 13, 11...
## $ `4/10/20` <dbl> 0, 17, 59, 9, 11, 12, 4, 3, 54, 171, 7, 27, 7, 13, 11...
## $ `4/11/20` <dbl> 0, 19, 66, 10, 13, 12, 4, 6, 57, 184, 7, 30, 9, 15, 1...
## $ `4/12/20` <dbl> 0, 19, 71, 10, 16, 13, 4, 7, 60, 200, 9, 30, 10, 19, ...
## $ `4/13/20` <dbl> 0, 19, 78, 10, 17, 15, 6, 8, 61, 212, 9, 33, 10, 19, ...
## $ `4/14/20` <dbl> 0, 23, 87, 11, 17, 16, 8, 8, 62, 216, 9, 33, 12, 21, ...
## $ `4/15/20` <dbl> 0, 25, 98, 13, 19, 17, 8, 11, 62, 227, 10, 37, 13, 22...
## $ `4/16/20` <dbl> 0, 25, 102, 14, 23, 18, 8, 11, 63, 234, 11, 37, 13, 2...
## $ `4/17/20` <dbl> 0, 25, 103, 15, 23, 20, 8, 13, 63, 236, 12, 37, 13, 2...
## $ `4/18/20` <dbl> 0, 25, 109, 18, 26, 20, 9, 13, 66, 240, 12, 39, 14, 2...
## $ `4/19/20` <dbl> 0, 27, 114, 20, 28, 21, 9, 14, 72, 246, 12, 42, 14, 2...
## $ `4/20/20` <dbl> 0, 28, 117, 22, 32, 22, 11, 14, 80, 257, 12, 43, 17, ...
## $ `4/21/20` <dbl> 0, 30, 123, 28, 32, 26, 11, 15, 83, 259, 12, 44, 18, ...
## $ `4/22/20` <dbl> 0, 32, 132, 29, 33, 29, 11, 17, 85, 270, 12, 46, 21, ...
## $ `4/23/20` <dbl> 0, 33, 143, 30, 33, 31, 12, 19, 88, 275, 12, 47, 22, ...
## $ `4/24/20` <dbl> 0, 36, 147, 32, 34, 31, 12, 21, 89, 282, 12, 49, 25, ...
## $ `4/25/20` <dbl> 0, 37, 154, 33, 35, 31, 12, 28, 90, 284, 12, 49, 27, ...
## $ `4/26/20` <dbl> 0, 37, 161, 33, 38, 34, 12, 32, 90, 285, 14, 51, 32, ...
## $ `4/27/20` <dbl> 0, 39, 168, 35, 42, 34, 12, 34, 90, 289, 14, 51, 39, ...
## $ `4/28/20` <dbl> 0, 40, 171, 37, 42, 34, 12, 45, 92, 290, 15, 52, 39, ...
## $ `4/29/20` <dbl> 0, 42, 173, 37, 42, 36, 12, 51, 93, 290, 15, 52, 39, ...
## $ `4/30/20` <dbl> 0, 42, 174, 39, 42, 37, 13, 53, 93, 290, 15, 52, 43, ...
## $ `5/1/20` <dbl> 0, 42, 175, 42, 42, 39, 14, 65, 93, 290, 15, 52, 49, ...
## $ `5/2/20` <dbl> 0, 45, 181, 43, 42, 40, 14, 92, 98, 294, 15, 54, 49, ...
## $ `5/3/20` <dbl> 0, 48, 187, 45, 42, 40, 14, 105, 105, 300, 16, 57, 49...
## $ `5/4/20` <dbl> 0, 53, 188, 45, 42, 40, 16, 114, 105, 302, 16, 58, 51...
## $ `5/5/20` <dbl> 0, 53, 189, 47, 43, 40, 18, 120, 114, 304, 17, 60, 54...
## $ `5/6/20` <dbl> 0, 58, 196, 47, 43, 42, 18, 130, 114, 306, 18, 61, 54...
## $ `5/7/20` <dbl> 0, 61, 205, 51, 44, 44, 18, 155, 120, 308, 18, 63, 56...
## $ `5/8/20` <dbl> 0, 67, 208, 53, 44, 44, 21, 162, 123, 311, 21, 63, 59...
## $ `5/9/20` <dbl> 0, 68, 216, 58, 45, 44, 22, 178, 124, 314, 22, 64, 61...
## $ `5/10/20` <dbl> 0, 74, 222, 59, 46, 44, 23, 189, 124, 316, 22, 65, 66...
## $ `5/11/20` <dbl> 0, 84, 224, 61, 46, 45, 26, 196, 125, 319, 24, 67, 67...
## $ `5/12/20` <dbl> 0, 91, 227, 67, 46, 45, 26, 224, 126, 324, 24, 69, 69...
## $ `5/13/20` <dbl> 0, 93, 231, 69, 46, 45, 28, 230, 127, 324, 24, 73, 72...
## $ `5/14/20` <dbl> 0, 103, 243, 74, 46, 45, 28, 249, 128, 326, 25, 74, 7...
## $ `5/15/20` <dbl> 0, 103, 244, 79, 49, 45, 32, 258, 129, 326, 26, 75, 8...
## $ `5/16/20` <dbl> 0, 110, 254, 79, 50, 45, 35, 271, 130, 328, 27, 77, 8...
## $ `5/17/20` <dbl> 0, 110, 254, 81, 50, 46, 35, 272, 130, 328, 27, 77, 8...
## $ `5/18/20` <dbl> 0, 120, 260, 85, 50, 47, 40, 285, 133, 329, 28, 79, 8...
## $ `5/19/20` <dbl> 0, 127, 262, 90, 51, 47, 52, 295, 133, 329, 29, 80, 9...
## $ `5/20/20` <dbl> 0, 136, 270, 96, 52, 47, 64, 312, 136, 330, 30, 83, 1...
## $ `5/21/20` <dbl> 0, 147, 270, 100, 52, 48, 71, 321, 136, 330, 31, 84, ...
## $ `5/22/20` <dbl> 0, 149, 271, 104, 55, 49, 89, 329, 137, 330, 33, 85, ...
## $ `5/23/20` <dbl> 0, 155, 273, 105, 58, 49, 105, 335, 138, 330, 33, 86,...
## $ `5/24/20` <dbl> 0, 159, 274, 110, 59, 49, 111, 344, 141, 336, 33, 87,...
## $ `5/25/20` <dbl> 0, 173, 276, 116, 62, 49, 141, 368, 147, 337, 33, 87,...
## $ `5/26/20` <dbl> 0, 189, 277, 122, 66, 51, 167, 380, 150, 338, 33, 90,...
## $ `5/27/20` <dbl> 0, 192, 281, 130, 71, 53, 176, 391, 152, 340, 33, 93,...
## $ `5/28/20` <dbl> 0, 205, 281, 132, 71, 58, 185, 392, 152, 349, 34, 97,...
## $ `5/29/20` <dbl> 0, 212, 282, 147, 71, 60, 201, 396, 153, 352, 36, 99,...
## $ `5/30/20` <dbl> 0, 216, 283, 150, 72, 61, 203, 402, 154, 353, 37, 100...
## $ `5/31/20` <dbl> 0, 220, 288, 164, 75, 62, 209, 410, 157, 355, 37, 100...
## $ `6/1/20` <dbl> 0, 233, 292, 172, 76, 63, 209, 414, 164, 358, 38, 103...
## $ `6/2/20` <dbl> 0, 238, 292, 175, 76, 63, 212, 416, 165, 358, 38, 104...
## $ `6/3/20` <dbl> 0, 239, 292, 177, 76, 63, 215, 419, 165, 359, 38, 105...
## $ `6/4/20` <dbl> 0, 241, 293, 177, 76, 63, 217, 421, 167, 360, 38, 107...
## $ `6/5/20` <dbl> 0, 248, 296, 183, 76, 64, 219, 431, 169, 363, 38, 108...
## $ `6/6/20` <dbl> 0, 259, 304, 190, 77, 70, 225, 442, 174, 373, 40, 108...
## $ `6/7/20` <dbl> 0, 265, 313, 193, 77, 72, 232, 449, 176, 378, 42, 110...
## $ `6/8/20` <dbl> 0, 272, 320, 197, 79, 73, 238, 455, 178, 383, 42, 111...
## $ `6/9/20` <dbl> 0, 282, 325, 199, 85, 75, 243, 464, 180, 391, 42, 117...
## $ `6/10/20` <dbl> 0, 295, 331, 208, 89, 79, 248, 471, 182, 401, 42, 118...
## $ `6/11/20` <dbl> 0, 312, 343, 214, 93, 87, 253, 484, 184, 417, 42, 121...
## $ `6/12/20` <dbl> 0, 323, 353, 221, 97, 95, 258, 499, 188, 427, 46, 122...
## $ `6/13/20` <dbl> 0, 331, 361, 226, 100, 102, 276, 517, 190, 438, 47, 1...
## $ `6/14/20` <dbl> 0, 357, 364, 234, 104, 110, 302, 536, 195, 453, 51, 1...
## $ `6/15/20` <dbl> 0, 368, 383, 238, 111, 116, 307, 544, 204, 475, 53, 1...
## $ `6/16/20` <dbl> 0, 373, 389, 245, 116, 121, 310, 551, 206, 485, 53, 1...
## $ `6/17/20` <dbl> 0, 375, 392, 251, 118, 123, 313, 554, 208, 486, 53, 1...
## $ `6/18/20` <dbl> 0, 400, 401, 263, 121, 130, 320, 566, 210, 501, 55, 1...
## $ `6/19/20` <dbl> 0, 411, 413, 266, 126, 139, 320, 569, 210, 507, 58, 1...
## $ `6/20/20` <dbl> 0, 431, 420, 272, 126, 143, 327, 572, 211, 516, 58, 1...
## $ `6/21/20` <dbl> 0, 434, 430, 272, 127, 149, 327, 576, 213, 521, 58, 1...
## $ `6/22/20` <dbl> 0, 442, 437, 277, 129, 153, 328, 578, 215, 528, 58, 1...
## $ `6/23/20` <dbl> 0, 453, 450, 280, 135, 159, 329, 581, 216, 534, 58, 1...
## $ `6/24/20` <dbl> 0, 469, 464, 288, 141, 168, 336, 584, 220, 543, 58, 1...
## $ `6/25/20` <dbl> 0, 479, 477, 305, 149, 176, 351, 588, 233, 549, 64, 1...
## $ `6/26/20` <dbl> 0, 488, 515, 312, 153, 184, 351, 594, 236, 559, 68, 1...
## $ `6/27/20` <dbl> 0, 498, 555, 317, 161, 188, 358, 600, 245, 561, 69, 2...
## $ `6/28/20` <dbl> 0, 503, 575, 317, 162, 189, 358, 602, 245, 561, 70, 2...
## $ `6/29/20` <dbl> 0, 527, 643, 322, 165, 199, 365, 605, 269, 585, 73, 2...
## $ `6/30/20` <dbl> 0, 537, 680, 325, 170, 208, 365, 607, 276, 590, 74, 2...
## $ `7/1/20` <dbl> 0, 553, 703, 326, 174, 218, 367, 607, 278, 595, 77, 2...
## $ `7/2/20` <dbl> 0, 561, 751, 335, 179, 222, 369, 610, 288, 611, 82, 2...
## $ `7/3/20` <dbl> 0, 568, 845, 348, 189, 230, 372, 625, 330, 625, 88, 2...
## $ `7/4/20` <dbl> 0, 591, 863, 350, 190, 234, 373, 626, 340, 637, 88, 2...
## $ `7/5/20` <dbl> 0, 615, 881, 352, 193, 239, 373, 634, 362, 642, 100, ...
## $ `7/6/20` <dbl> 0, 618, 911, 356, 197, 247, 373, 634, 384, 655, 105, ...
## $ `7/7/20` <dbl> 0, 644, 997, 360, 199, 255, 373, 634, 395, 656, 106, ...
## $ `7/8/20` <dbl> 0, 651, 1056, 366, 201, 262, 374, 639, 411, 660, 114,...
## $ `7/9/20` <dbl> 0, 661, 1131, 371, 211, 282, 375, 646, 445, 672, 115,...
## $ `7/10/20` <dbl> 0, 670, 1187, 381, 218, 292, 381, 648, 465, 679, 118,...
## $ `7/11/20` <dbl> 0, 684, 1224, 398, 224, 307, 382, 654, 500, 690, 128,...
## $ `7/12/20` <dbl> 0, 706, 1294, 403, 228, 331, 383, 655, 526, 693, 129,...
## $ `7/13/20` <dbl> 0, 728, 1359, 413, 231, 350, 383, 660, 566, 702, 136,...
## $ `7/14/20` <dbl> 0, 746, 1414, 428, 236, 366, 385, 661, 589, 712, 140,...
## $ `7/15/20` <dbl> 0, 756, 1518, 441, 242, 389, 386, 664, 655, 718, 145,...
## $ `7/16/20` <dbl> 0, 780, 1599, 459, 247, 424, 389, 669, 675, 731, 152,...
## $ `7/17/20` <dbl> 0, 789, 1689, 463, 255, 440, 393, 672, 720, 742, 157,...
## $ `7/18/20` <dbl> 0, 827, 1819, 483, 264, 458, 397, 678, 741, 756, 165,...
## $ `7/19/20` <dbl> 0, 842, 1937, 495, 269, 482, 398, 686, 785, 762, 173,...
## $ `7/20/20` <dbl> 0, 857, 2013, 503, 279, 507, 400, 689, 832, 767, 179,...
## $ `7/21/20` <dbl> 0, 865, 2102, 514, 283, 524, 401, 695, 869, 774, 182,...
## $ `7/22/20` <dbl> 0, 886, 2196, 518, 287, 547, 407, 701, 891, 782, 184,...
## $ `7/23/20` <dbl> 0, 905, 2461, 534, 289, 585, 408, 706, 934, 789, 193,...
## $ `7/24/20` <dbl> 0, 921, 2513, 539, 303, 615, 411, 711, 999, 797, 205,...
## $ `7/25/20` <dbl> 0, 932, 2662, 552, 318, 637, 414, 720, 1062, 810, 207...
## $ `7/26/20` <dbl> 0, 942, 2708, 562, 324, 646, 415, 724, 1113, 821, 209...
## $ `7/27/20` <dbl> 0, 965, 2770, 569, 334, 669, 416, 730, 1194, 825, 220...
## $ `7/28/20` <dbl> 0, 974, 2835, 575, 337, 675, 429, 734, 1243, 836, 221...
## $ `7/29/20` <dbl> 0, 974, 2835, 575, 338, 675, 429, 734, 1244, 836, 221...
## $ `7/30/20` <dbl> 0, 1002, 3028, 585, 352, 731, 435, 747, 1336, 848, 23...
## $ `7/31/20` <dbl> 0, 1015, 3101, 598, 363, 767, 437, 753, 1450, 859, 23...
## $ `8/1/20` <dbl> 0, 1030, 3142, 602, 368, 792, 443, 757, 1480, 861, 24...
## $ `8/2/20` <dbl> 0, 1052, 3223, 610, 372, 813, 445, 765, 1580, 868, 25...
## $ `8/3/20` <dbl> 0, 1066, 3265, 612, 382, 830, 446, 766, 1612, 875, 26...
## $ `8/4/20` <dbl> 0, 1073, 3320, 614, 389, 836, 449, 766, 1646, 882, 26...
## $ `8/5/20` <dbl> 0, 1073, 3380, 615, 392, 839, 452, 769, 1683, 886, 27...
## $ `8/6/20` <dbl> 0, 1096, 3438, 619, 421, 874, 458, 771, 1741, 893, 28...
## $ `8/7/20` <dbl> 0, 1113, 3504, 624, 424, 909, 462, 774, 1777, 899, 29...
## $ `8/8/20` <dbl> 0, 1134, 3564, 628, 434, 923, 471, 773, 1836, 904, 29...
## $ `8/9/20` <dbl> 0, 1215, 3606, 630, 446, 934, 472, 779, 1860, 906, 30...
## $ `8/10/20` <dbl> 0, 1215, 3714, 631, 450, 947, 474, 782, 1883, 909, 30...
## $ `8/11/20` <dbl> 0, 1215, 3736, 643, 455, 958, 489, 785, 1914, 916, 30...
## $ `8/12/20` <dbl> 0, 1241, 3776, 646, 464, 967, 500, 788, 1935, 918, 31...
## $ `8/13/20` <dbl> 0, 1250, 3813, 651, 469, 977, 501, 790, 1959, 919, 32...
## $ `8/14/20` <dbl> 0, 1252, 3860, 656, 477, 989, 502, 796, 1975, 922, 32...
## $ `8/15/20` <dbl> 0, 1262, 3909, 663, 483, 996, 503, 807, 2019, 925, 33...
## $ `8/16/20` <dbl> 0, 1273, 3948, 671, 483, 1005, 504, 811, 2037, 927, 3...
## $ `8/17/20` <dbl> 0, 1274, 3960, 672, 488, 1008, 504, 814, 2055, 928, 3...
## $ `8/18/20` <dbl> 0, 1291, 3977, 674, 490, 1034, 512, 814, 2107, 937, 3...
## $ `8/19/20` <dbl> 0, 1293, 4002, 683, 503, 1049, 530, 814, 2159, 941, 3...
## $ `8/20/20` <dbl> 0, 1293, 4035, 690, 507, 1077, 534, 814, 2214, 949, 3...
## $ `8/21/20` <dbl> 0, 1293, 4054, 690, 509, 1083, 534, 814, 2228, 952, 3...
## $ `8/22/20` <dbl> 0, 1322, 4115, 699, 516, 1096, 536, 822, 2276, 957, 3...
## $ `8/23/20` <dbl> 0, 1324, 4147, 702, 523, 1099, 536, 824, 2286, 958, 3...
## $ `8/24/20` <dbl> 0, 1351, 4167, 720, 526, 1135, 536, 825, 2327, 971, 3...
## $ `8/25/20` <dbl> 0, 1355, 4190, 724, 527, 1160, 536, 826, 2345, 973, 3...
## $ `8/26/20` <dbl> 0, 1366, 4265, 732, 530, 1195, 537, 833, 2400, 983, 3...
## $ `8/27/20` <dbl> 0, 1377, 4311, 739, 533, 1213, 538, 839, 2413, 1011, ...
## $ `8/28/20` <dbl> 0, 1389, 4347, 745, 535, 1219, 541, 840, 2443, 1017, ...
## $ `8/29/20` <dbl> 0, 1400, 4424, 753, 540, 1248, 546, 855, 2499, 1024, ...
## $ `8/30/20` <dbl> 0, 1438, 4525, 757, 550, 1277, 550, 864, 2533, 1027, ...
## $ `8/31/20` <dbl> 0, 1442, 4545, 757, 554, 1287, 551, 866, 2567, 1033, ...
## $ `9/1/20` <dbl> 0, 1452, 4568, 764, 558, 1303, 559, 871, 2619, 1041, ...
## $ `9/2/20` <dbl> 0, 1452, 4583, 768, 562, 1308, 561, 872, 2633, 1045, ...
## $ `9/3/20` <dbl> 0, 1466, 4628, 771, 564, 1336, 563, 874, 2678, 1046, ...
## $ `9/4/20` <dbl> 0, 1475, 4654, 776, 570, 1361, 563, 881, 2747, 1054, ...
## $ `9/5/20` <dbl> 0, 1492, 4686, 776, 576, 1376, 566, 886, 2830, 1059, ...
## $ `9/6/20` <dbl> 0, 1498, 4713, 777, 581, 1379, 568, 890, 2842, 1061, ...
## $ `9/7/20` <dbl> 0, 1504, 4730, 778, 583, 1384, 568, 892, 2877, 1063, ...
## $ `9/8/20` <dbl> 0, 1508, 4757, 778, 589, 1390, 568, 892, 2891, 1064, ...
## $ `9/9/20` <dbl> 0, 1522, 4787, 778, 591, 1401, 570, 895, 2907, 1068, ...
## $ `9/10/20` <dbl> 0, 1544, 4833, 785, 594, 1430, 572, 896, 2958, 1076, ...
## $ `9/11/20` <dbl> 0, 1551, 4886, 786, 602, 1441, 573, 896, 2988, 1088, ...
## $ `9/12/20` <dbl> 0, 1565, 4922, 792, 604, 1446, 574, 898, 3047, 1094, ...
## $ `9/13/20` <dbl> 0, 1576, 4959, 794, 607, 1453, 580, 899, 3093, 1094, ...
## $ `9/14/20` <dbl> 0, 1585, 4978, 801, 610, 1464, 580, 900, 3110, 1097, ...
## $ `9/15/20` <dbl> 0, 1601, 4992, 806, 611, 1475, 581, 901, 3127, 1102, ...
## Observations: 760,410
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumCases <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## Parsed with column specification:
## cols(
## .default = col_double(),
## `County Name` = col_character(),
## State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 3,195
## Variables: 242
## $ countyFIPS <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/26/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/27/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/28/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/29/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/30/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/31/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/1/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/2/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/3/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/4/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/5/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/6/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/7/20` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/8/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/9/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/10/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/11/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/12/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/13/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/14/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/15/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/16/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/17/20` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/18/20` <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/19/20` <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/20/20` <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/21/20` <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 13, 0, 0, 0, 1, 0, 0, 0, 1...
## $ `4/22/20` <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 16, 0, 0, 0, 1, 0, 1, 0, 1...
## $ `4/23/20` <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 16, 0, 1, 0, 1, 1, 1, 0, 1...
## $ `4/24/20` <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 17, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/25/20` <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/26/20` <dbl> 0, 2, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/27/20` <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/28/20` <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 19, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/29/20` <dbl> 0, 3, 2, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/30/20` <dbl> 0, 3, 3, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/1/20` <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/2/20` <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/3/20` <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/4/20` <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/5/20` <dbl> 0, 3, 5, 1, 0, 0, 0, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/6/20` <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/7/20` <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/8/20` <dbl> 0, 3, 5, 1, 1, 0, 1, 3, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/9/20` <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/10/20` <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/11/20` <dbl> 0, 3, 6, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/12/20` <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 21, 0, 1, 1, 1, 1, 1, 1, 2...
## $ `5/13/20` <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 22, 0, 1, 2, 1, 1, 1, 1, 2...
## $ `5/14/20` <dbl> 0, 3, 8, 1, 1, 0, 1, 8, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/15/20` <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/16/20` <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/17/20` <dbl> 0, 3, 8, 1, 1, 1, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/18/20` <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/19/20` <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/20/20` <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/21/20` <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/22/20` <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/23/20` <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/24/20` <dbl> 0, 3, 9, 1, 1, 1, 1, 12, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/25/20` <dbl> 0, 3, 9, 1, 1, 1, 3, 12, 3, 24, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/26/20` <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/27/20` <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/28/20` <dbl> 0, 3, 9, 1, 1, 1, 4, 15, 3, 24, 2, 1, 8, 2, 2, 1, 1, ...
## $ `5/29/20` <dbl> 0, 3, 9, 1, 1, 1, 4, 16, 3, 24, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/30/20` <dbl> 0, 4, 9, 1, 1, 1, 4, 17, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/31/20` <dbl> 0, 4, 9, 1, 1, 1, 5, 18, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `6/1/20` <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 25, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/2/20` <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/3/20` <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/4/20` <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/5/20` <dbl> 0, 5, 9, 1, 1, 1, 7, 21, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/6/20` <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/7/20` <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/8/20` <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/9/20` <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/10/20` <dbl> 0, 6, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/11/20` <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/12/20` <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/13/20` <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/14/20` <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/15/20` <dbl> 0, 6, 9, 1, 1, 1, 9, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/16/20` <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/17/20` <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/18/20` <dbl> 0, 8, 9, 1, 1, 1, 9, 25, 4, 26, 5, 3, 11, 4, 2, 1, 1,...
## $ `6/19/20` <dbl> 0, 8, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 11, 4, 2, 1, 1...
## $ `6/20/20` <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/21/20` <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/22/20` <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/23/20` <dbl> 0, 9, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 4, 2, 1, 1...
## $ `6/24/20` <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/25/20` <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/26/20` <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/27/20` <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/28/20` <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/29/20` <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/30/20` <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/1/20` <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/2/20` <dbl> 0, 13, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/3/20` <dbl> 0, 13, 10, 2, 1, 1, 10, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/4/20` <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/5/20` <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/6/20` <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/7/20` <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/8/20` <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/9/20` <dbl> 0, 14, 11, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/10/20` <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/11/20` <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/12/20` <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/13/20` <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/14/20` <dbl> 0, 18, 12, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/15/20` <dbl> 0, 19, 13, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/16/20` <dbl> 0, 20, 14, 3, 2, 1, 11, 32, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/17/20` <dbl> 0, 21, 14, 3, 2, 1, 11, 33, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/18/20` <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/19/20` <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/20/20` <dbl> 0, 21, 15, 4, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/21/20` <dbl> 0, 21, 16, 4, 2, 1, 11, 34, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/22/20` <dbl> 0, 21, 16, 4, 2, 1, 12, 34, 6, 34, 7, 5, 12, 8, 2, 1,...
## $ `7/23/20` <dbl> 0, 21, 17, 4, 2, 1, 12, 35, 6, 34, 7, 5, 12, 9, 2, 1,...
## $ `7/24/20` <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/25/20` <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/26/20` <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/27/20` <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/28/20` <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/29/20` <dbl> 0, 21, 21, 4, 2, 3, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/30/20` <dbl> 0, 21, 21, 5, 2, 3, 12, 36, 8, 38, 8, 6, 12, 9, 5, 1,...
## $ `7/31/20` <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/1/20` <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/2/20` <dbl> 0, 21, 23, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/3/20` <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/4/20` <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 12, 38, 8, 7, 12, 9, 5, 1...
## $ `8/5/20` <dbl> 0, 22, 24, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/6/20` <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/7/20` <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/8/20` <dbl> 0, 22, 26, 5, 5, 4, 12, 37, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/9/20` <dbl> 0, 22, 27, 5, 5, 4, 12, 37, 14, 38, 8, 8, 12, 9, 5, 1...
## $ `8/10/20` <dbl> 0, 22, 28, 5, 5, 4, 12, 37, 17, 38, 9, 9, 12, 10, 5, ...
## $ `8/11/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 19, 38, 9, 12, 12, 10, 5,...
## $ `8/12/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/13/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/14/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/15/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/16/20` <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/17/20` <dbl> 0, 23, 32, 6, 6, 5, 14, 37, 23, 38, 9, 12, 12, 10, 5,...
## $ `8/18/20` <dbl> 0, 23, 33, 6, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/19/20` <dbl> 0, 23, 33, 7, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/20/20` <dbl> 0, 23, 34, 7, 6, 5, 14, 37, 25, 38, 10, 12, 12, 10, 5...
## $ `8/21/20` <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 5...
## $ `8/22/20` <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/23/20` <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/24/20` <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 27, 38, 10, 13, 12, 11, 6...
## $ `8/25/20` <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 28, 39, 10, 13, 12, 11, 6...
## $ `8/26/20` <dbl> 0, 23, 36, 7, 6, 7, 14, 37, 28, 39, 10, 13, 12, 12, 6...
## $ `8/27/20` <dbl> 0, 23, 37, 7, 6, 7, 14, 37, 30, 39, 12, 13, 12, 13, 6...
## $ `8/28/20` <dbl> 0, 23, 39, 7, 6, 9, 14, 37, 32, 40, 12, 13, 12, 13, 6...
## $ `8/29/20` <dbl> 0, 23, 40, 7, 7, 9, 14, 37, 35, 40, 12, 13, 12, 14, 6...
## $ `8/30/20` <dbl> 0, 23, 40, 7, 7, 10, 14, 37, 35, 40, 12, 13, 12, 14, ...
## $ `8/31/20` <dbl> 0, 23, 42, 7, 8, 11, 14, 37, 36, 40, 12, 13, 12, 14, ...
## $ `9/1/20` <dbl> 0, 24, 42, 7, 8, 11, 14, 37, 38, 40, 12, 13, 12, 14, ...
## $ `9/2/20` <dbl> 0, 24, 42, 7, 8, 11, 14, 37, 38, 40, 12, 14, 12, 14, ...
## $ `9/3/20` <dbl> 0, 24, 44, 7, 8, 11, 14, 37, 38, 40, 12, 14, 12, 14, ...
## $ `9/4/20` <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/5/20` <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/6/20` <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/7/20` <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/8/20` <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/9/20` <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/10/20` <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 13, 16, 12, 14, ...
## $ `9/11/20` <dbl> 0, 24, 47, 7, 9, 12, 14, 38, 38, 41, 14, 17, 12, 15, ...
## $ `9/12/20` <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 14, 18, 12, 16, ...
## $ `9/13/20` <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 14, 18, 12, 16, ...
## $ `9/14/20` <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 14, 18, 12, 16, ...
## $ `9/15/20` <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 15, 24, 12, 16, ...
## Observations: 760,410
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumDeaths <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
# Comparison of total burden by date vs. previous file
bind_rows(burden_20200903, burden_20200917, .id="source") %>%
mutate(source=factor(case_when(source==1 ~ "2020-09-03", source==2 ~ "2020-09-17", TRUE ~ "Unknown"),
levels=c("2020-09-17", "2020-09-03", "Unknown")
)
) %>%
group_by(source, date) %>%
summarize(cumDeaths=sum(cumDeaths), cumCases=sum(cumCases)) %>%
pivot_longer(-c(source, date)) %>%
ggplot(aes(x=date, y=value/1000, group=source, color=source)) +
geom_line() +
facet_wrap(~c("cumCases"="Cases", "cumDeaths"="Deaths")[name], scales="free_y") +
scale_x_date(date_breaks="1 months", date_labels="%m") +
labs(y="Burden (000s)", title="US National Coronavirus Burden by Source")
# Plot burden data through 15-SEP-2020
plotBurdenData(burden_20200917, maxDate="2020-09-15", minPop=10000)
# Create filtered county data for 25k+ population using data through 15-SEP-2020
countyFiltered_20200917 <- createCountyFiltered(burden_20200917, maxDate="2020-09-15", minPop=25000)
##
## This should be 1, otherwise there are duplicates
## [1] 1
##
##
##
##
## Min and max should be the same if there are records everywhere on every day
## # A tibble: 1 x 4
## n_min population_min n_max population_max
## <dbl> <dbl> <dbl> <dbl>
## 1 3127 326429233 3127 326429233
##
##
##
## # A tibble: 1 x 6
## cpm_mean_is0 dpm_mean_is0 cpm_mean_lt100 dpm_mean_lt100 cpm_mean_lt5000
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0.0283 0 0.224 0.104
## # ... with 1 more variable: dpm_mean_lt5000 <dbl>
The data appear to be largely consistent from early September to mid-September. The existing segments are applied to the new data:
helper_test_20200917 <- helperAssessCountyClusters(countyCluster_km_test$objCluster$cluster,
dfPop=countyFiltered_20200917,
dfBurden=countyFiltered_20200917,
thruLabel="Sep 15, 2020",
plotsTogether=TRUE,
showMap=TRUE,
clusterPlotsTogether=TRUE,
orderCluster=TRUE
)
##
## Recency is defined as 2020-08-17 through current
##
## Recency is defined as 2020-08-17 through current
## Warning: Removed 1 rows containing missing values (geom_point).
The segments appear to be reasonable with the new county-level data. Next steps are to check the alignment of peaks (shapes) in cases and deaths by national and sub-national levels.
As an initial exploratory step, the curves for cases and deaths by cluster are superimposed, with the scale modified such that 100 represents the 7-day peak of the data:
clusterNorm <- helper_test_20200917 %>%
group_by(cluster, date) %>%
summarize(cpm7=sum(pop*cpm7)/sum(pop), dpm7=sum(pop*dpm7)/sum(pop), pop=sum(pop)) %>%
group_by(cluster) %>%
mutate(caseNorm=100*cpm7/max(cpm7, na.rm=TRUE), deathNorm=100*dpm7/max(dpm7, na.rm=TRUE)) %>%
ungroup()
clusterNorm %>%
select(cluster, date, caseNorm, deathNorm) %>%
pivot_longer(-c(cluster, date)) %>%
filter(!is.na(value)) %>%
ggplot(aes(x=date,
y=value,
color=c("deathNorm"="Normalized Death", "caseNorm"="Normalized Cases")[name],
group=name
)
) +
geom_line() +
facet_wrap(~cluster) +
scale_color_discrete("Metric") +
scale_x_date(date_breaks="1 months", date_labels="%m") +
labs(x="2020",
y="Normalized Burden",
title="Burden by Segment",
subtitle="Normalized (100 is segment maximum for metric)"
)
There are significant differences in behavior with early epidemic and late epidemic:
Next steps are to begin quantifying the lag factor and ratio, focused for now on the cluster-level data
Suppose that the “early pandemic” is defined as March-May and an attempt is made to best line up the shapes of the curves during that time period:
clusterNorm %>%
select(cluster, date, caseNorm, deathNorm) %>%
pivot_longer(-c(cluster, date)) %>%
filter(!is.na(value), date>="2020-03-01", date<="2020-05-31") %>%
ggplot(aes(x=date,
y=value,
color=c("deathNorm"="Normalized Death", "caseNorm"="Normalized Cases")[name],
group=name
)
) +
geom_line() +
facet_wrap(~cluster) +
scale_color_discrete("Metric") +
scale_x_date(date_breaks="1 months", date_labels="%m") +
labs(x="2020",
y="Normalized Burden",
title="Burden by Segment (March 1 - May 31)",
subtitle="Normalized (100 is segment maximum for metric)"
)
clusterMarchMay <- clusterNorm %>%
select(cluster, date, cpm7, dpm7) %>%
filter(!is.na(cpm7), !is.na(dpm7), date>="2020-03-01", date<="2020-05-31")
helperCorrel <- function(lagDays, df=clusterMarchMay, x="cpm7", y="dpm7") {
df %>%
group_by(cluster) %>%
arrange(date) %>%
mutate(xlag=lag(get(x), lagDays)) %>%
summarize(corr=cor(xlag, get(y), use="complete.obs")) %>%
mutate(lag=lagDays) %>%
ungroup()
}
corrLagEarly <- map_dfr(.x=0:30, .f=helperCorrel)
corrLagEarly %>%
ggplot(aes(x=lag, y=corr)) +
geom_line(aes(group=cluster, color=cluster)) +
facet_wrap(~cluster)
The two segments with early disease show stronger correlations between deaths and cases with a bit of a lag. The best lag time for each cluster is identified:
bestLag <- corrLagEarly %>%
group_by(cluster) %>%
filter(corr==max(corr)) %>%
ungroup()
bestLag
## # A tibble: 5 x 3
## cluster corr lag
## <fct> <dbl> <int>
## 1 5 0.971 2
## 2 2 0.809 4
## 3 1 0.937 5
## 4 4 0.972 7
## 5 3 0.989 8
There is roughly a 1-week lag between cases and deaths in the early pandemic. The ratio of deaths to cases can then be calculated on the lag data:
clusterMarchMay %>%
inner_join(bestLag, by="cluster") %>%
group_by(cluster) %>%
arrange(date) %>%
mutate(cpmlag=lag(cpm7, min(lag))) %>%
filter(!is.na(cpmlag)) %>%
mutate(ratCur=dpm7/cpmlag, ratCum=cumsum(dpm7)/cumsum(cpmlag)) %>%
ggplot(aes(x=date, group=cluster, color=cluster)) +
geom_line(aes(y=ratCum)) +
geom_line(aes(y=ratCur), lty=2) +
labs(x="",
y="Cumulative Death vs. Case Ratio",
title="Deaths vs. Cases with Lag (Early Pandemic)",
subtitle="Solid line is cumulative, dashed line is current time period"
) +
geom_text(data=~filter(., date==max(date)),
aes(label=paste0(round(100*ratCum, 1), "%"), y=ratCum+0.02)
) +
facet_wrap(~cluster)
In the heavier hit early clusters, there was roughly a one-week lag between cases and deaths, and deaths ran between 5% and 10% of cases. The segments that were not hit as heavy generally showed a lower number of deaths relative to cases.
Next steps are to generalize the approach so that it can be applied to different geographies and timings:
# Helper function to make normalized data for cases and deaths
helperMakeNormData <- function(df,
aggBy="cluster",
plotData=TRUE
) {
# FUNCTION ARGUMENTS:
# df: data frame or tibble
# aggBy: variable for aggregation for the normalized data
# plotData: boolean, whether to plot the data
# Create the normalized data
normData <- df %>%
group_by_at(vars(all_of(c(aggBy, "date")))) %>%
summarize(cpm7=sum(pop*cpm7)/sum(pop), dpm7=sum(pop*dpm7)/sum(pop), pop=sum(pop)) %>%
group_by_at(vars(all_of(aggBy))) %>%
mutate(caseNorm=100*cpm7/max(cpm7, na.rm=TRUE), deathNorm=100*dpm7/max(dpm7, na.rm=TRUE)) %>%
ungroup()
# Plot the normalized data (if requested)
if (plotData) {
# Create the plotting object
p1 <- normData %>%
select(all_of(aggBy), date, caseNorm, deathNorm) %>%
pivot_longer(-c(all_of(aggBy), "date")) %>%
filter(!is.na(value)) %>%
ggplot(aes(x=date,
y=value,
color=c("deathNorm"="Normalized Death", "caseNorm"="Normalized Cases")[name],
group=name
)
) +
geom_line() +
facet_wrap(~get(aggBy)) +
scale_color_discrete("Metric") +
scale_x_date(date_breaks="1 months", date_labels="%m") +
labs(x="2020",
y="Normalized Burden",
title="Burden by Segment",
subtitle="Normalized (100 is segment maximum for metric)"
)
print(p1)
}
# Return the normalized data
normData
}
cNorm_002 <- helperMakeNormData(helper_test_20200917)
identical(cNorm_002, clusterNorm)
## [1] TRUE
Subsets of the data can then be taken, with lags calculated:
# Helper function to assess the correlations by lag
helperCorrel <- function(lagDays, df, aggBy="cluster", x="cpm7", y="dpm7") {
df %>%
group_by_at(vars(all_of(aggBy))) %>%
arrange(date) %>%
mutate(xlag=lag(get(x), lagDays)) %>%
summarize(corr=cor(xlag, get(y), use="complete.obs")) %>%
mutate(lag=lagDays) %>%
ungroup()
}
# Function to test lags and produce subsetted frame
helperTestLags <- function(df,
minDate=NULL,
maxDate=NULL,
aggBy="cluster",
plotData=TRUE
) {
# FUNCTION ARGUMENTS:
# df: data frame or tibble
# minDate: the minimum date to use for analysis (NULL means use data minimum)
# maxDate: the maximum date to use for analysis (NULL means use data minimum)
# aggBy: the aggregation level for the input data in df
# plotData: boolean, whether to plot the subset of the data
# Get the minimum and/or maximum date from the data if passed as NULL
if (is.null(minDate)) minDate <- df %>% pull(date) %>% min(na.rm=TRUE) else minDate <- as.Date(minDate)
if (is.null(maxDate)) maxDate <- df %>% pull(date) %>% max(na.rm=TRUE) else maxDate <- as.Date(maxDate)
# Filter the data to only the desired columns and time periods
filterData <- df %>%
select(all_of(aggBy), date, cpm7, dpm7) %>%
filter(!is.na(cpm7), !is.na(dpm7), date>=minDate, date<=maxDate)
# Create the plot data if requested
if (plotData) {
p1 <- filterData %>%
pivot_longer(-c(all_of(aggBy), date)) %>%
group_by_at(vars(all_of(c(aggBy, "name")))) %>%
mutate(normValue=100*value/max(value)) %>%
filter(!is.na(normValue)) %>%
ggplot(aes(x=date,
y=normValue,
color=c("dpm7"="Normalized Death", "cpm7"="Normalized Cases")[name],
group=name
)
) +
geom_line() +
facet_wrap(~get(aggBy)) +
scale_color_discrete("Metric") +
scale_x_date(date_breaks="1 months", date_labels="%m") +
labs(x="2020",
y="Normalized Burden",
title=paste0("Burden by Segment (",
format(minDate, "%b %d"),
" - ",
format(maxDate, "%b %d"),
")"
),
subtitle="Normalized (100 is segment maximum for metric during time period)"
)
print(p1)
}
# Test the various lags for correlations
corrLagEarly <- map_dfr(.x=0:30, .f=helperCorrel, df=filterData)
# Plot of correlations
p2 <- corrLagEarly %>%
ggplot(aes(x=lag, y=corr)) +
geom_line(aes(group=get(aggBy), color=get(aggBy))) +
facet_wrap(~get(aggBy)) +
labs(x="Lag (days) for cases",
y="Correlation to deaths",
title="Correlation of lagged cases and deaths"
)
print(p2)
# Find the best lags for each aggBy
bestLag <- corrLagEarly %>%
group_by_at(vars(all_of(aggBy))) %>%
filter(corr==max(corr)) %>%
ungroup()
cat("\nThe best lags are:\n")
print(bestLag)
# Create the lagged database
lagData <- filterData %>%
inner_join(bestLag, by=aggBy) %>%
group_by_at(vars(all_of(aggBy))) %>%
arrange(date) %>%
mutate(cpmlag=lag(cpm7, min(lag)))
# Create the plot of the lagged database
p3 <- lagData %>%
filter(!is.na(cpmlag)) %>%
mutate(ratCur=dpm7/cpmlag, ratCum=cumsum(dpm7)/cumsum(cpmlag)) %>%
ggplot(aes(x=date, group=get(aggBy), color=get(aggBy))) +
geom_line(aes(y=ratCum)) +
geom_line(aes(y=ratCur), lty=2) +
labs(x="",
y="Cumulative Death vs. Case Ratio",
title="Deaths vs. Cases with Lag (Early Pandemic)",
subtitle="Solid line is cumulative, dashed line is current time period"
) +
geom_text(data=~filter(., date==max(date)),
aes(label=paste0(round(100*ratCum, 1), "%"), y=ratCum+0.02, x=date-lubridate::days(10))
) +
facet_wrap(~get(aggBy))
print(p3)
# Return the lagged data
lagData
}
# Create for early pandemic
lagData_early_002 <- helperTestLags(cNorm_002, minDate="2020-03-01", maxDate="2020-05-31")
##
## The best lags are:
## # A tibble: 5 x 3
## cluster corr lag
## <fct> <dbl> <int>
## 1 5 0.971 2
## 2 2 0.809 4
## 3 1 0.937 5
## 4 4 0.972 7
## 5 3 0.989 8
# Create for late pandemic
lagData_late_002 <- helperTestLags(cNorm_002, minDate="2020-06-01", maxDate="2020-08-31")
##
## The best lags are:
## # A tibble: 5 x 3
## cluster corr lag
## <fct> <dbl> <int>
## 1 2 0.802 6
## 2 4 0.897 17
## 3 1 0.971 21
## 4 5 0.518 23
## 5 3 0.467 26
Lags for segments with late disease appear to be in the 3-week range, suggestive that cases may be diagnosed earlier in the late pandemic. Fatality rates are generally much lower in the ‘late disease’ segments also, in the 1%-2% range rather than the previously observed 5%-10% range. This is consistent with expanded testing finding more cases of mild disease, a hypothesis that has been piublicly floated.
Next steps are to drill down to various levels such as specific counties.